Rewrite convulated code to avoid adding r0.
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
83
84 /* This file should be included last. */
85 #include "target-def.h"
86
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
100
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load;
103
104 #ifdef USING_ELFOS_H
105 /* Counter for labels which are to be placed in .fixup. */
106 int fixuplabelno = 0;
107 #endif
108
109 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
110 int dot_symbols;
111
112 /* Specify the machine mode that pointers have. After generation of rtl, the
113 compiler makes no further distinction between pointers and any other objects
114 of this machine mode. */
115 scalar_int_mode rs6000_pmode;
116
117 #if TARGET_ELF
118 /* Note whether IEEE 128-bit floating point was passed or returned, either as
119 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
120 floating point. We changed the default C++ mangling for these types and we
121 may want to generate a weak alias of the old mangling (U10__float128) to the
122 new mangling (u9__ieee128). */
123 bool rs6000_passes_ieee128 = false;
124 #endif
125
126 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
127 name used in current releases (i.e. u9__ieee128). */
128 static bool ieee128_mangling_gcc_8_1;
129
130 /* Width in bits of a pointer. */
131 unsigned rs6000_pointer_size;
132
133 #ifdef HAVE_AS_GNU_ATTRIBUTE
134 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
135 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
136 # endif
137 /* Flag whether floating point values have been passed/returned.
138 Note that this doesn't say whether fprs are used, since the
139 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
140 should be set for soft-float values passed in gprs and ieee128
141 values passed in vsx registers. */
142 bool rs6000_passes_float = false;
143 bool rs6000_passes_long_double = false;
144 /* Flag whether vector values have been passed/returned. */
145 bool rs6000_passes_vector = false;
146 /* Flag whether small (<= 8 byte) structures have been returned. */
147 bool rs6000_returns_struct = false;
148 #endif
149
150 /* Value is TRUE if register/mode pair is acceptable. */
151 static bool rs6000_hard_regno_mode_ok_p
152 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
153
154 /* Maximum number of registers needed for a given register class and mode. */
155 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
156
157 /* How many registers are needed for a given register and mode. */
158 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
159
160 /* Map register number to register class. */
161 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
162
163 static int dbg_cost_ctrl;
164
165 /* Built in types. */
166 tree rs6000_builtin_types[RS6000_BTI_MAX];
167 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
168
169 /* Flag to say the TOC is initialized */
170 int toc_initialized, need_toc_init;
171 char toc_label_name[10];
172
173 /* Cached value of rs6000_variable_issue. This is cached in
174 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
175 static short cached_can_issue_more;
176
177 static GTY(()) section *read_only_data_section;
178 static GTY(()) section *private_data_section;
179 static GTY(()) section *tls_data_section;
180 static GTY(()) section *tls_private_data_section;
181 static GTY(()) section *read_only_private_data_section;
182 static GTY(()) section *sdata2_section;
183
184 extern GTY(()) section *toc_section;
185 section *toc_section = 0;
186
187 /* Describe the vector unit used for modes. */
188 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
189 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
190
191 /* Register classes for various constraints that are based on the target
192 switches. */
193 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
194
195 /* Describe the alignment of a vector. */
196 int rs6000_vector_align[NUM_MACHINE_MODES];
197
198 /* Map selected modes to types for builtins. */
199 GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
200
201 /* What modes to automatically generate reciprocal divide estimate (fre) and
202 reciprocal sqrt (frsqrte) for. */
203 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
204
205 /* Masks to determine which reciprocal esitmate instructions to generate
206 automatically. */
207 enum rs6000_recip_mask {
208 RECIP_SF_DIV = 0x001, /* Use divide estimate */
209 RECIP_DF_DIV = 0x002,
210 RECIP_V4SF_DIV = 0x004,
211 RECIP_V2DF_DIV = 0x008,
212
213 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
214 RECIP_DF_RSQRT = 0x020,
215 RECIP_V4SF_RSQRT = 0x040,
216 RECIP_V2DF_RSQRT = 0x080,
217
218 /* Various combination of flags for -mrecip=xxx. */
219 RECIP_NONE = 0,
220 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
221 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
222 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
223
224 RECIP_HIGH_PRECISION = RECIP_ALL,
225
226 /* On low precision machines like the power5, don't enable double precision
227 reciprocal square root estimate, since it isn't accurate enough. */
228 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
229 };
230
231 /* -mrecip options. */
232 static struct
233 {
234 const char *string; /* option name */
235 unsigned int mask; /* mask bits to set */
236 } recip_options[] = {
237 { "all", RECIP_ALL },
238 { "none", RECIP_NONE },
239 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
240 | RECIP_V2DF_DIV) },
241 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
242 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
243 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
244 | RECIP_V2DF_RSQRT) },
245 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
246 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
247 };
248
249 /* On PowerPC, we have a limited number of target clones that we care about
250 which means we can use an array to hold the options, rather than having more
251 elaborate data structures to identify each possible variation. Order the
252 clones from the default to the highest ISA. */
253 enum {
254 CLONE_DEFAULT = 0, /* default clone. */
255 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
256 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
257 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
258 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
259 CLONE_MAX
260 };
261
262 /* Map compiler ISA bits into HWCAP names. */
263 struct clone_map {
264 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
265 const char *name; /* name to use in __builtin_cpu_supports. */
266 };
267
268 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
269 { 0, "" }, /* Default options. */
270 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
271 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
272 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
273 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
274 };
275
276
277 /* Newer LIBCs explicitly export this symbol to declare that they provide
278 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
279 reference to this symbol whenever we expand a CPU builtin, so that
280 we never link against an old LIBC. */
281 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
282
283 /* True if we have expanded a CPU builtin. */
284 bool cpu_builtin_p = false;
285
286 /* Pointer to function (in rs6000-c.c) that can define or undefine target
287 macros that have changed. Languages that don't support the preprocessor
288 don't link in rs6000-c.c, so we can't call it directly. */
289 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
290
291 /* Simplfy register classes into simpler classifications. We assume
292 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
293 check for standard register classes (gpr/floating/altivec/vsx) and
294 floating/vector classes (float/altivec/vsx). */
295
296 enum rs6000_reg_type {
297 NO_REG_TYPE,
298 PSEUDO_REG_TYPE,
299 GPR_REG_TYPE,
300 VSX_REG_TYPE,
301 ALTIVEC_REG_TYPE,
302 FPR_REG_TYPE,
303 SPR_REG_TYPE,
304 CR_REG_TYPE
305 };
306
307 /* Map register class to register type. */
308 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
309
310 /* First/last register type for the 'normal' register types (i.e. general
311 purpose, floating point, altivec, and VSX registers). */
312 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
313
314 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
315
316
317 /* Register classes we care about in secondary reload or go if legitimate
318 address. We only need to worry about GPR, FPR, and Altivec registers here,
319 along an ANY field that is the OR of the 3 register classes. */
320
321 enum rs6000_reload_reg_type {
322 RELOAD_REG_GPR, /* General purpose registers. */
323 RELOAD_REG_FPR, /* Traditional floating point regs. */
324 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
325 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
326 N_RELOAD_REG
327 };
328
329 /* For setting up register classes, loop through the 3 register classes mapping
330 into real registers, and skip the ANY class, which is just an OR of the
331 bits. */
332 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
333 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
334
335 /* Map reload register type to a register in the register class. */
336 struct reload_reg_map_type {
337 const char *name; /* Register class name. */
338 int reg; /* Register in the register class. */
339 };
340
341 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
342 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
343 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
344 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
345 { "Any", -1 }, /* RELOAD_REG_ANY. */
346 };
347
348 /* Mask bits for each register class, indexed per mode. Historically the
349 compiler has been more restrictive which types can do PRE_MODIFY instead of
350 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
351 typedef unsigned char addr_mask_type;
352
353 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
354 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
355 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
356 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
357 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
358 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
359 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
360 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
361
362 /* Register type masks based on the type, of valid addressing modes. */
363 struct rs6000_reg_addr {
364 enum insn_code reload_load; /* INSN to reload for loading. */
365 enum insn_code reload_store; /* INSN to reload for storing. */
366 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
367 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
368 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
369 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
370 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
371 };
372
373 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
374
375 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
376 static inline bool
377 mode_supports_pre_incdec_p (machine_mode mode)
378 {
379 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
380 != 0);
381 }
382
383 /* Helper function to say whether a mode supports PRE_MODIFY. */
384 static inline bool
385 mode_supports_pre_modify_p (machine_mode mode)
386 {
387 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
388 != 0);
389 }
390
391 /* Return true if we have D-form addressing in altivec registers. */
392 static inline bool
393 mode_supports_vmx_dform (machine_mode mode)
394 {
395 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
396 }
397
398 /* Return true if we have D-form addressing in VSX registers. This addressing
399 is more limited than normal d-form addressing in that the offset must be
400 aligned on a 16-byte boundary. */
401 static inline bool
402 mode_supports_dq_form (machine_mode mode)
403 {
404 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
405 != 0);
406 }
407
408 /* Given that there exists at least one variable that is set (produced)
409 by OUT_INSN and read (consumed) by IN_INSN, return true iff
410 IN_INSN represents one or more memory store operations and none of
411 the variables set by OUT_INSN is used by IN_INSN as the address of a
412 store operation. If either IN_INSN or OUT_INSN does not represent
413 a "single" RTL SET expression (as loosely defined by the
414 implementation of the single_set function) or a PARALLEL with only
415 SETs, CLOBBERs, and USEs inside, this function returns false.
416
417 This rs6000-specific version of store_data_bypass_p checks for
418 certain conditions that result in assertion failures (and internal
419 compiler errors) in the generic store_data_bypass_p function and
420 returns false rather than calling store_data_bypass_p if one of the
421 problematic conditions is detected. */
422
423 int
424 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
425 {
426 rtx out_set, in_set;
427 rtx out_pat, in_pat;
428 rtx out_exp, in_exp;
429 int i, j;
430
431 in_set = single_set (in_insn);
432 if (in_set)
433 {
434 if (MEM_P (SET_DEST (in_set)))
435 {
436 out_set = single_set (out_insn);
437 if (!out_set)
438 {
439 out_pat = PATTERN (out_insn);
440 if (GET_CODE (out_pat) == PARALLEL)
441 {
442 for (i = 0; i < XVECLEN (out_pat, 0); i++)
443 {
444 out_exp = XVECEXP (out_pat, 0, i);
445 if ((GET_CODE (out_exp) == CLOBBER)
446 || (GET_CODE (out_exp) == USE))
447 continue;
448 else if (GET_CODE (out_exp) != SET)
449 return false;
450 }
451 }
452 }
453 }
454 }
455 else
456 {
457 in_pat = PATTERN (in_insn);
458 if (GET_CODE (in_pat) != PARALLEL)
459 return false;
460
461 for (i = 0; i < XVECLEN (in_pat, 0); i++)
462 {
463 in_exp = XVECEXP (in_pat, 0, i);
464 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
465 continue;
466 else if (GET_CODE (in_exp) != SET)
467 return false;
468
469 if (MEM_P (SET_DEST (in_exp)))
470 {
471 out_set = single_set (out_insn);
472 if (!out_set)
473 {
474 out_pat = PATTERN (out_insn);
475 if (GET_CODE (out_pat) != PARALLEL)
476 return false;
477 for (j = 0; j < XVECLEN (out_pat, 0); j++)
478 {
479 out_exp = XVECEXP (out_pat, 0, j);
480 if ((GET_CODE (out_exp) == CLOBBER)
481 || (GET_CODE (out_exp) == USE))
482 continue;
483 else if (GET_CODE (out_exp) != SET)
484 return false;
485 }
486 }
487 }
488 }
489 }
490 return store_data_bypass_p (out_insn, in_insn);
491 }
492
493 \f
494 /* Processor costs (relative to an add) */
495
496 const struct processor_costs *rs6000_cost;
497
498 /* Instruction size costs on 32bit processors. */
499 static const
500 struct processor_costs size32_cost = {
501 COSTS_N_INSNS (1), /* mulsi */
502 COSTS_N_INSNS (1), /* mulsi_const */
503 COSTS_N_INSNS (1), /* mulsi_const9 */
504 COSTS_N_INSNS (1), /* muldi */
505 COSTS_N_INSNS (1), /* divsi */
506 COSTS_N_INSNS (1), /* divdi */
507 COSTS_N_INSNS (1), /* fp */
508 COSTS_N_INSNS (1), /* dmul */
509 COSTS_N_INSNS (1), /* sdiv */
510 COSTS_N_INSNS (1), /* ddiv */
511 32, /* cache line size */
512 0, /* l1 cache */
513 0, /* l2 cache */
514 0, /* streams */
515 0, /* SF->DF convert */
516 };
517
518 /* Instruction size costs on 64bit processors. */
519 static const
520 struct processor_costs size64_cost = {
521 COSTS_N_INSNS (1), /* mulsi */
522 COSTS_N_INSNS (1), /* mulsi_const */
523 COSTS_N_INSNS (1), /* mulsi_const9 */
524 COSTS_N_INSNS (1), /* muldi */
525 COSTS_N_INSNS (1), /* divsi */
526 COSTS_N_INSNS (1), /* divdi */
527 COSTS_N_INSNS (1), /* fp */
528 COSTS_N_INSNS (1), /* dmul */
529 COSTS_N_INSNS (1), /* sdiv */
530 COSTS_N_INSNS (1), /* ddiv */
531 128, /* cache line size */
532 0, /* l1 cache */
533 0, /* l2 cache */
534 0, /* streams */
535 0, /* SF->DF convert */
536 };
537
538 /* Instruction costs on RS64A processors. */
539 static const
540 struct processor_costs rs64a_cost = {
541 COSTS_N_INSNS (20), /* mulsi */
542 COSTS_N_INSNS (12), /* mulsi_const */
543 COSTS_N_INSNS (8), /* mulsi_const9 */
544 COSTS_N_INSNS (34), /* muldi */
545 COSTS_N_INSNS (65), /* divsi */
546 COSTS_N_INSNS (67), /* divdi */
547 COSTS_N_INSNS (4), /* fp */
548 COSTS_N_INSNS (4), /* dmul */
549 COSTS_N_INSNS (31), /* sdiv */
550 COSTS_N_INSNS (31), /* ddiv */
551 128, /* cache line size */
552 128, /* l1 cache */
553 2048, /* l2 cache */
554 1, /* streams */
555 0, /* SF->DF convert */
556 };
557
558 /* Instruction costs on MPCCORE processors. */
559 static const
560 struct processor_costs mpccore_cost = {
561 COSTS_N_INSNS (2), /* mulsi */
562 COSTS_N_INSNS (2), /* mulsi_const */
563 COSTS_N_INSNS (2), /* mulsi_const9 */
564 COSTS_N_INSNS (2), /* muldi */
565 COSTS_N_INSNS (6), /* divsi */
566 COSTS_N_INSNS (6), /* divdi */
567 COSTS_N_INSNS (4), /* fp */
568 COSTS_N_INSNS (5), /* dmul */
569 COSTS_N_INSNS (10), /* sdiv */
570 COSTS_N_INSNS (17), /* ddiv */
571 32, /* cache line size */
572 4, /* l1 cache */
573 16, /* l2 cache */
574 1, /* streams */
575 0, /* SF->DF convert */
576 };
577
578 /* Instruction costs on PPC403 processors. */
579 static const
580 struct processor_costs ppc403_cost = {
581 COSTS_N_INSNS (4), /* mulsi */
582 COSTS_N_INSNS (4), /* mulsi_const */
583 COSTS_N_INSNS (4), /* mulsi_const9 */
584 COSTS_N_INSNS (4), /* muldi */
585 COSTS_N_INSNS (33), /* divsi */
586 COSTS_N_INSNS (33), /* divdi */
587 COSTS_N_INSNS (11), /* fp */
588 COSTS_N_INSNS (11), /* dmul */
589 COSTS_N_INSNS (11), /* sdiv */
590 COSTS_N_INSNS (11), /* ddiv */
591 32, /* cache line size */
592 4, /* l1 cache */
593 16, /* l2 cache */
594 1, /* streams */
595 0, /* SF->DF convert */
596 };
597
598 /* Instruction costs on PPC405 processors. */
599 static const
600 struct processor_costs ppc405_cost = {
601 COSTS_N_INSNS (5), /* mulsi */
602 COSTS_N_INSNS (4), /* mulsi_const */
603 COSTS_N_INSNS (3), /* mulsi_const9 */
604 COSTS_N_INSNS (5), /* muldi */
605 COSTS_N_INSNS (35), /* divsi */
606 COSTS_N_INSNS (35), /* divdi */
607 COSTS_N_INSNS (11), /* fp */
608 COSTS_N_INSNS (11), /* dmul */
609 COSTS_N_INSNS (11), /* sdiv */
610 COSTS_N_INSNS (11), /* ddiv */
611 32, /* cache line size */
612 16, /* l1 cache */
613 128, /* l2 cache */
614 1, /* streams */
615 0, /* SF->DF convert */
616 };
617
618 /* Instruction costs on PPC440 processors. */
619 static const
620 struct processor_costs ppc440_cost = {
621 COSTS_N_INSNS (3), /* mulsi */
622 COSTS_N_INSNS (2), /* mulsi_const */
623 COSTS_N_INSNS (2), /* mulsi_const9 */
624 COSTS_N_INSNS (3), /* muldi */
625 COSTS_N_INSNS (34), /* divsi */
626 COSTS_N_INSNS (34), /* divdi */
627 COSTS_N_INSNS (5), /* fp */
628 COSTS_N_INSNS (5), /* dmul */
629 COSTS_N_INSNS (19), /* sdiv */
630 COSTS_N_INSNS (33), /* ddiv */
631 32, /* cache line size */
632 32, /* l1 cache */
633 256, /* l2 cache */
634 1, /* streams */
635 0, /* SF->DF convert */
636 };
637
638 /* Instruction costs on PPC476 processors. */
639 static const
640 struct processor_costs ppc476_cost = {
641 COSTS_N_INSNS (4), /* mulsi */
642 COSTS_N_INSNS (4), /* mulsi_const */
643 COSTS_N_INSNS (4), /* mulsi_const9 */
644 COSTS_N_INSNS (4), /* muldi */
645 COSTS_N_INSNS (11), /* divsi */
646 COSTS_N_INSNS (11), /* divdi */
647 COSTS_N_INSNS (6), /* fp */
648 COSTS_N_INSNS (6), /* dmul */
649 COSTS_N_INSNS (19), /* sdiv */
650 COSTS_N_INSNS (33), /* ddiv */
651 32, /* l1 cache line size */
652 32, /* l1 cache */
653 512, /* l2 cache */
654 1, /* streams */
655 0, /* SF->DF convert */
656 };
657
658 /* Instruction costs on PPC601 processors. */
659 static const
660 struct processor_costs ppc601_cost = {
661 COSTS_N_INSNS (5), /* mulsi */
662 COSTS_N_INSNS (5), /* mulsi_const */
663 COSTS_N_INSNS (5), /* mulsi_const9 */
664 COSTS_N_INSNS (5), /* muldi */
665 COSTS_N_INSNS (36), /* divsi */
666 COSTS_N_INSNS (36), /* divdi */
667 COSTS_N_INSNS (4), /* fp */
668 COSTS_N_INSNS (5), /* dmul */
669 COSTS_N_INSNS (17), /* sdiv */
670 COSTS_N_INSNS (31), /* ddiv */
671 32, /* cache line size */
672 32, /* l1 cache */
673 256, /* l2 cache */
674 1, /* streams */
675 0, /* SF->DF convert */
676 };
677
678 /* Instruction costs on PPC603 processors. */
679 static const
680 struct processor_costs ppc603_cost = {
681 COSTS_N_INSNS (5), /* mulsi */
682 COSTS_N_INSNS (3), /* mulsi_const */
683 COSTS_N_INSNS (2), /* mulsi_const9 */
684 COSTS_N_INSNS (5), /* muldi */
685 COSTS_N_INSNS (37), /* divsi */
686 COSTS_N_INSNS (37), /* divdi */
687 COSTS_N_INSNS (3), /* fp */
688 COSTS_N_INSNS (4), /* dmul */
689 COSTS_N_INSNS (18), /* sdiv */
690 COSTS_N_INSNS (33), /* ddiv */
691 32, /* cache line size */
692 8, /* l1 cache */
693 64, /* l2 cache */
694 1, /* streams */
695 0, /* SF->DF convert */
696 };
697
698 /* Instruction costs on PPC604 processors. */
699 static const
700 struct processor_costs ppc604_cost = {
701 COSTS_N_INSNS (4), /* mulsi */
702 COSTS_N_INSNS (4), /* mulsi_const */
703 COSTS_N_INSNS (4), /* mulsi_const9 */
704 COSTS_N_INSNS (4), /* muldi */
705 COSTS_N_INSNS (20), /* divsi */
706 COSTS_N_INSNS (20), /* divdi */
707 COSTS_N_INSNS (3), /* fp */
708 COSTS_N_INSNS (3), /* dmul */
709 COSTS_N_INSNS (18), /* sdiv */
710 COSTS_N_INSNS (32), /* ddiv */
711 32, /* cache line size */
712 16, /* l1 cache */
713 512, /* l2 cache */
714 1, /* streams */
715 0, /* SF->DF convert */
716 };
717
718 /* Instruction costs on PPC604e processors. */
719 static const
720 struct processor_costs ppc604e_cost = {
721 COSTS_N_INSNS (2), /* mulsi */
722 COSTS_N_INSNS (2), /* mulsi_const */
723 COSTS_N_INSNS (2), /* mulsi_const9 */
724 COSTS_N_INSNS (2), /* muldi */
725 COSTS_N_INSNS (20), /* divsi */
726 COSTS_N_INSNS (20), /* divdi */
727 COSTS_N_INSNS (3), /* fp */
728 COSTS_N_INSNS (3), /* dmul */
729 COSTS_N_INSNS (18), /* sdiv */
730 COSTS_N_INSNS (32), /* ddiv */
731 32, /* cache line size */
732 32, /* l1 cache */
733 1024, /* l2 cache */
734 1, /* streams */
735 0, /* SF->DF convert */
736 };
737
738 /* Instruction costs on PPC620 processors. */
739 static const
740 struct processor_costs ppc620_cost = {
741 COSTS_N_INSNS (5), /* mulsi */
742 COSTS_N_INSNS (4), /* mulsi_const */
743 COSTS_N_INSNS (3), /* mulsi_const9 */
744 COSTS_N_INSNS (7), /* muldi */
745 COSTS_N_INSNS (21), /* divsi */
746 COSTS_N_INSNS (37), /* divdi */
747 COSTS_N_INSNS (3), /* fp */
748 COSTS_N_INSNS (3), /* dmul */
749 COSTS_N_INSNS (18), /* sdiv */
750 COSTS_N_INSNS (32), /* ddiv */
751 128, /* cache line size */
752 32, /* l1 cache */
753 1024, /* l2 cache */
754 1, /* streams */
755 0, /* SF->DF convert */
756 };
757
758 /* Instruction costs on PPC630 processors. */
759 static const
760 struct processor_costs ppc630_cost = {
761 COSTS_N_INSNS (5), /* mulsi */
762 COSTS_N_INSNS (4), /* mulsi_const */
763 COSTS_N_INSNS (3), /* mulsi_const9 */
764 COSTS_N_INSNS (7), /* muldi */
765 COSTS_N_INSNS (21), /* divsi */
766 COSTS_N_INSNS (37), /* divdi */
767 COSTS_N_INSNS (3), /* fp */
768 COSTS_N_INSNS (3), /* dmul */
769 COSTS_N_INSNS (17), /* sdiv */
770 COSTS_N_INSNS (21), /* ddiv */
771 128, /* cache line size */
772 64, /* l1 cache */
773 1024, /* l2 cache */
774 1, /* streams */
775 0, /* SF->DF convert */
776 };
777
778 /* Instruction costs on Cell processor. */
779 /* COSTS_N_INSNS (1) ~ one add. */
780 static const
781 struct processor_costs ppccell_cost = {
782 COSTS_N_INSNS (9/2)+2, /* mulsi */
783 COSTS_N_INSNS (6/2), /* mulsi_const */
784 COSTS_N_INSNS (6/2), /* mulsi_const9 */
785 COSTS_N_INSNS (15/2)+2, /* muldi */
786 COSTS_N_INSNS (38/2), /* divsi */
787 COSTS_N_INSNS (70/2), /* divdi */
788 COSTS_N_INSNS (10/2), /* fp */
789 COSTS_N_INSNS (10/2), /* dmul */
790 COSTS_N_INSNS (74/2), /* sdiv */
791 COSTS_N_INSNS (74/2), /* ddiv */
792 128, /* cache line size */
793 32, /* l1 cache */
794 512, /* l2 cache */
795 6, /* streams */
796 0, /* SF->DF convert */
797 };
798
799 /* Instruction costs on PPC750 and PPC7400 processors. */
800 static const
801 struct processor_costs ppc750_cost = {
802 COSTS_N_INSNS (5), /* mulsi */
803 COSTS_N_INSNS (3), /* mulsi_const */
804 COSTS_N_INSNS (2), /* mulsi_const9 */
805 COSTS_N_INSNS (5), /* muldi */
806 COSTS_N_INSNS (17), /* divsi */
807 COSTS_N_INSNS (17), /* divdi */
808 COSTS_N_INSNS (3), /* fp */
809 COSTS_N_INSNS (3), /* dmul */
810 COSTS_N_INSNS (17), /* sdiv */
811 COSTS_N_INSNS (31), /* ddiv */
812 32, /* cache line size */
813 32, /* l1 cache */
814 512, /* l2 cache */
815 1, /* streams */
816 0, /* SF->DF convert */
817 };
818
819 /* Instruction costs on PPC7450 processors. */
820 static const
821 struct processor_costs ppc7450_cost = {
822 COSTS_N_INSNS (4), /* mulsi */
823 COSTS_N_INSNS (3), /* mulsi_const */
824 COSTS_N_INSNS (3), /* mulsi_const9 */
825 COSTS_N_INSNS (4), /* muldi */
826 COSTS_N_INSNS (23), /* divsi */
827 COSTS_N_INSNS (23), /* divdi */
828 COSTS_N_INSNS (5), /* fp */
829 COSTS_N_INSNS (5), /* dmul */
830 COSTS_N_INSNS (21), /* sdiv */
831 COSTS_N_INSNS (35), /* ddiv */
832 32, /* cache line size */
833 32, /* l1 cache */
834 1024, /* l2 cache */
835 1, /* streams */
836 0, /* SF->DF convert */
837 };
838
839 /* Instruction costs on PPC8540 processors. */
840 static const
841 struct processor_costs ppc8540_cost = {
842 COSTS_N_INSNS (4), /* mulsi */
843 COSTS_N_INSNS (4), /* mulsi_const */
844 COSTS_N_INSNS (4), /* mulsi_const9 */
845 COSTS_N_INSNS (4), /* muldi */
846 COSTS_N_INSNS (19), /* divsi */
847 COSTS_N_INSNS (19), /* divdi */
848 COSTS_N_INSNS (4), /* fp */
849 COSTS_N_INSNS (4), /* dmul */
850 COSTS_N_INSNS (29), /* sdiv */
851 COSTS_N_INSNS (29), /* ddiv */
852 32, /* cache line size */
853 32, /* l1 cache */
854 256, /* l2 cache */
855 1, /* prefetch streams /*/
856 0, /* SF->DF convert */
857 };
858
859 /* Instruction costs on E300C2 and E300C3 cores. */
860 static const
861 struct processor_costs ppce300c2c3_cost = {
862 COSTS_N_INSNS (4), /* mulsi */
863 COSTS_N_INSNS (4), /* mulsi_const */
864 COSTS_N_INSNS (4), /* mulsi_const9 */
865 COSTS_N_INSNS (4), /* muldi */
866 COSTS_N_INSNS (19), /* divsi */
867 COSTS_N_INSNS (19), /* divdi */
868 COSTS_N_INSNS (3), /* fp */
869 COSTS_N_INSNS (4), /* dmul */
870 COSTS_N_INSNS (18), /* sdiv */
871 COSTS_N_INSNS (33), /* ddiv */
872 32,
873 16, /* l1 cache */
874 16, /* l2 cache */
875 1, /* prefetch streams /*/
876 0, /* SF->DF convert */
877 };
878
879 /* Instruction costs on PPCE500MC processors. */
880 static const
881 struct processor_costs ppce500mc_cost = {
882 COSTS_N_INSNS (4), /* mulsi */
883 COSTS_N_INSNS (4), /* mulsi_const */
884 COSTS_N_INSNS (4), /* mulsi_const9 */
885 COSTS_N_INSNS (4), /* muldi */
886 COSTS_N_INSNS (14), /* divsi */
887 COSTS_N_INSNS (14), /* divdi */
888 COSTS_N_INSNS (8), /* fp */
889 COSTS_N_INSNS (10), /* dmul */
890 COSTS_N_INSNS (36), /* sdiv */
891 COSTS_N_INSNS (66), /* ddiv */
892 64, /* cache line size */
893 32, /* l1 cache */
894 128, /* l2 cache */
895 1, /* prefetch streams /*/
896 0, /* SF->DF convert */
897 };
898
899 /* Instruction costs on PPCE500MC64 processors. */
900 static const
901 struct processor_costs ppce500mc64_cost = {
902 COSTS_N_INSNS (4), /* mulsi */
903 COSTS_N_INSNS (4), /* mulsi_const */
904 COSTS_N_INSNS (4), /* mulsi_const9 */
905 COSTS_N_INSNS (4), /* muldi */
906 COSTS_N_INSNS (14), /* divsi */
907 COSTS_N_INSNS (14), /* divdi */
908 COSTS_N_INSNS (4), /* fp */
909 COSTS_N_INSNS (10), /* dmul */
910 COSTS_N_INSNS (36), /* sdiv */
911 COSTS_N_INSNS (66), /* ddiv */
912 64, /* cache line size */
913 32, /* l1 cache */
914 128, /* l2 cache */
915 1, /* prefetch streams /*/
916 0, /* SF->DF convert */
917 };
918
919 /* Instruction costs on PPCE5500 processors. */
920 static const
921 struct processor_costs ppce5500_cost = {
922 COSTS_N_INSNS (5), /* mulsi */
923 COSTS_N_INSNS (5), /* mulsi_const */
924 COSTS_N_INSNS (4), /* mulsi_const9 */
925 COSTS_N_INSNS (5), /* muldi */
926 COSTS_N_INSNS (14), /* divsi */
927 COSTS_N_INSNS (14), /* divdi */
928 COSTS_N_INSNS (7), /* fp */
929 COSTS_N_INSNS (10), /* dmul */
930 COSTS_N_INSNS (36), /* sdiv */
931 COSTS_N_INSNS (66), /* ddiv */
932 64, /* cache line size */
933 32, /* l1 cache */
934 128, /* l2 cache */
935 1, /* prefetch streams /*/
936 0, /* SF->DF convert */
937 };
938
939 /* Instruction costs on PPCE6500 processors. */
940 static const
941 struct processor_costs ppce6500_cost = {
942 COSTS_N_INSNS (5), /* mulsi */
943 COSTS_N_INSNS (5), /* mulsi_const */
944 COSTS_N_INSNS (4), /* mulsi_const9 */
945 COSTS_N_INSNS (5), /* muldi */
946 COSTS_N_INSNS (14), /* divsi */
947 COSTS_N_INSNS (14), /* divdi */
948 COSTS_N_INSNS (7), /* fp */
949 COSTS_N_INSNS (10), /* dmul */
950 COSTS_N_INSNS (36), /* sdiv */
951 COSTS_N_INSNS (66), /* ddiv */
952 64, /* cache line size */
953 32, /* l1 cache */
954 128, /* l2 cache */
955 1, /* prefetch streams /*/
956 0, /* SF->DF convert */
957 };
958
959 /* Instruction costs on AppliedMicro Titan processors. */
960 static const
961 struct processor_costs titan_cost = {
962 COSTS_N_INSNS (5), /* mulsi */
963 COSTS_N_INSNS (5), /* mulsi_const */
964 COSTS_N_INSNS (5), /* mulsi_const9 */
965 COSTS_N_INSNS (5), /* muldi */
966 COSTS_N_INSNS (18), /* divsi */
967 COSTS_N_INSNS (18), /* divdi */
968 COSTS_N_INSNS (10), /* fp */
969 COSTS_N_INSNS (10), /* dmul */
970 COSTS_N_INSNS (46), /* sdiv */
971 COSTS_N_INSNS (72), /* ddiv */
972 32, /* cache line size */
973 32, /* l1 cache */
974 512, /* l2 cache */
975 1, /* prefetch streams /*/
976 0, /* SF->DF convert */
977 };
978
979 /* Instruction costs on POWER4 and POWER5 processors. */
980 static const
981 struct processor_costs power4_cost = {
982 COSTS_N_INSNS (3), /* mulsi */
983 COSTS_N_INSNS (2), /* mulsi_const */
984 COSTS_N_INSNS (2), /* mulsi_const9 */
985 COSTS_N_INSNS (4), /* muldi */
986 COSTS_N_INSNS (18), /* divsi */
987 COSTS_N_INSNS (34), /* divdi */
988 COSTS_N_INSNS (3), /* fp */
989 COSTS_N_INSNS (3), /* dmul */
990 COSTS_N_INSNS (17), /* sdiv */
991 COSTS_N_INSNS (17), /* ddiv */
992 128, /* cache line size */
993 32, /* l1 cache */
994 1024, /* l2 cache */
995 8, /* prefetch streams /*/
996 0, /* SF->DF convert */
997 };
998
999 /* Instruction costs on POWER6 processors. */
1000 static const
1001 struct processor_costs power6_cost = {
1002 COSTS_N_INSNS (8), /* mulsi */
1003 COSTS_N_INSNS (8), /* mulsi_const */
1004 COSTS_N_INSNS (8), /* mulsi_const9 */
1005 COSTS_N_INSNS (8), /* muldi */
1006 COSTS_N_INSNS (22), /* divsi */
1007 COSTS_N_INSNS (28), /* divdi */
1008 COSTS_N_INSNS (3), /* fp */
1009 COSTS_N_INSNS (3), /* dmul */
1010 COSTS_N_INSNS (13), /* sdiv */
1011 COSTS_N_INSNS (16), /* ddiv */
1012 128, /* cache line size */
1013 64, /* l1 cache */
1014 2048, /* l2 cache */
1015 16, /* prefetch streams */
1016 0, /* SF->DF convert */
1017 };
1018
1019 /* Instruction costs on POWER7 processors. */
1020 static const
1021 struct processor_costs power7_cost = {
1022 COSTS_N_INSNS (2), /* mulsi */
1023 COSTS_N_INSNS (2), /* mulsi_const */
1024 COSTS_N_INSNS (2), /* mulsi_const9 */
1025 COSTS_N_INSNS (2), /* muldi */
1026 COSTS_N_INSNS (18), /* divsi */
1027 COSTS_N_INSNS (34), /* divdi */
1028 COSTS_N_INSNS (3), /* fp */
1029 COSTS_N_INSNS (3), /* dmul */
1030 COSTS_N_INSNS (13), /* sdiv */
1031 COSTS_N_INSNS (16), /* ddiv */
1032 128, /* cache line size */
1033 32, /* l1 cache */
1034 256, /* l2 cache */
1035 12, /* prefetch streams */
1036 COSTS_N_INSNS (3), /* SF->DF convert */
1037 };
1038
1039 /* Instruction costs on POWER8 processors. */
1040 static const
1041 struct processor_costs power8_cost = {
1042 COSTS_N_INSNS (3), /* mulsi */
1043 COSTS_N_INSNS (3), /* mulsi_const */
1044 COSTS_N_INSNS (3), /* mulsi_const9 */
1045 COSTS_N_INSNS (3), /* muldi */
1046 COSTS_N_INSNS (19), /* divsi */
1047 COSTS_N_INSNS (35), /* divdi */
1048 COSTS_N_INSNS (3), /* fp */
1049 COSTS_N_INSNS (3), /* dmul */
1050 COSTS_N_INSNS (14), /* sdiv */
1051 COSTS_N_INSNS (17), /* ddiv */
1052 128, /* cache line size */
1053 32, /* l1 cache */
1054 256, /* l2 cache */
1055 12, /* prefetch streams */
1056 COSTS_N_INSNS (3), /* SF->DF convert */
1057 };
1058
1059 /* Instruction costs on POWER9 processors. */
1060 static const
1061 struct processor_costs power9_cost = {
1062 COSTS_N_INSNS (3), /* mulsi */
1063 COSTS_N_INSNS (3), /* mulsi_const */
1064 COSTS_N_INSNS (3), /* mulsi_const9 */
1065 COSTS_N_INSNS (3), /* muldi */
1066 COSTS_N_INSNS (8), /* divsi */
1067 COSTS_N_INSNS (12), /* divdi */
1068 COSTS_N_INSNS (3), /* fp */
1069 COSTS_N_INSNS (3), /* dmul */
1070 COSTS_N_INSNS (13), /* sdiv */
1071 COSTS_N_INSNS (18), /* ddiv */
1072 128, /* cache line size */
1073 32, /* l1 cache */
1074 512, /* l2 cache */
1075 8, /* prefetch streams */
1076 COSTS_N_INSNS (3), /* SF->DF convert */
1077 };
1078
1079 /* Instruction costs on POWER A2 processors. */
1080 static const
1081 struct processor_costs ppca2_cost = {
1082 COSTS_N_INSNS (16), /* mulsi */
1083 COSTS_N_INSNS (16), /* mulsi_const */
1084 COSTS_N_INSNS (16), /* mulsi_const9 */
1085 COSTS_N_INSNS (16), /* muldi */
1086 COSTS_N_INSNS (22), /* divsi */
1087 COSTS_N_INSNS (28), /* divdi */
1088 COSTS_N_INSNS (3), /* fp */
1089 COSTS_N_INSNS (3), /* dmul */
1090 COSTS_N_INSNS (59), /* sdiv */
1091 COSTS_N_INSNS (72), /* ddiv */
1092 64,
1093 16, /* l1 cache */
1094 2048, /* l2 cache */
1095 16, /* prefetch streams */
1096 0, /* SF->DF convert */
1097 };
1098
1099 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1100 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1101
1102 \f
1103 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1104 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1105 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1106 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1107 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1108 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1109 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1110 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1111 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1112 bool);
1113 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1114 unsigned int);
1115 static bool is_microcoded_insn (rtx_insn *);
1116 static bool is_nonpipeline_insn (rtx_insn *);
1117 static bool is_cracked_insn (rtx_insn *);
1118 static bool is_load_insn (rtx, rtx *);
1119 static bool is_store_insn (rtx, rtx *);
1120 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1121 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1122 static bool insn_must_be_first_in_group (rtx_insn *);
1123 static bool insn_must_be_last_in_group (rtx_insn *);
1124 int easy_vector_constant (rtx, machine_mode);
1125 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1126 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1127 #if TARGET_MACHO
1128 static tree get_prev_label (tree);
1129 #endif
1130 static bool rs6000_mode_dependent_address (const_rtx);
1131 static bool rs6000_debug_mode_dependent_address (const_rtx);
1132 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1133 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1134 machine_mode, rtx);
1135 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1136 machine_mode,
1137 rtx);
1138 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1139 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1140 enum reg_class);
1141 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1142 reg_class_t,
1143 reg_class_t);
1144 static bool rs6000_debug_can_change_mode_class (machine_mode,
1145 machine_mode,
1146 reg_class_t);
1147
1148 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1149 = rs6000_mode_dependent_address;
1150
1151 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1152 machine_mode, rtx)
1153 = rs6000_secondary_reload_class;
1154
1155 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1156 = rs6000_preferred_reload_class;
1157
1158 const int INSN_NOT_AVAILABLE = -1;
1159
1160 static void rs6000_print_isa_options (FILE *, int, const char *,
1161 HOST_WIDE_INT);
1162 static void rs6000_print_builtin_options (FILE *, int, const char *,
1163 HOST_WIDE_INT);
1164 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1165
1166 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1167 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1168 enum rs6000_reg_type,
1169 machine_mode,
1170 secondary_reload_info *,
1171 bool);
1172 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1173 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1174
1175 /* Hash table stuff for keeping track of TOC entries. */
1176
1177 struct GTY((for_user)) toc_hash_struct
1178 {
1179 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1180 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1181 rtx key;
1182 machine_mode key_mode;
1183 int labelno;
1184 };
1185
1186 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1187 {
1188 static hashval_t hash (toc_hash_struct *);
1189 static bool equal (toc_hash_struct *, toc_hash_struct *);
1190 };
1191
1192 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1193
1194
1195 \f
1196 /* Default register names. */
1197 char rs6000_reg_names[][8] =
1198 {
1199 /* GPRs */
1200 "0", "1", "2", "3", "4", "5", "6", "7",
1201 "8", "9", "10", "11", "12", "13", "14", "15",
1202 "16", "17", "18", "19", "20", "21", "22", "23",
1203 "24", "25", "26", "27", "28", "29", "30", "31",
1204 /* FPRs */
1205 "0", "1", "2", "3", "4", "5", "6", "7",
1206 "8", "9", "10", "11", "12", "13", "14", "15",
1207 "16", "17", "18", "19", "20", "21", "22", "23",
1208 "24", "25", "26", "27", "28", "29", "30", "31",
1209 /* VRs */
1210 "0", "1", "2", "3", "4", "5", "6", "7",
1211 "8", "9", "10", "11", "12", "13", "14", "15",
1212 "16", "17", "18", "19", "20", "21", "22", "23",
1213 "24", "25", "26", "27", "28", "29", "30", "31",
1214 /* lr ctr ca ap */
1215 "lr", "ctr", "ca", "ap",
1216 /* cr0..cr7 */
1217 "0", "1", "2", "3", "4", "5", "6", "7",
1218 /* vrsave vscr sfp */
1219 "vrsave", "vscr", "sfp",
1220 };
1221
1222 #ifdef TARGET_REGNAMES
1223 static const char alt_reg_names[][8] =
1224 {
1225 /* GPRs */
1226 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1227 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1228 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1229 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1230 /* FPRs */
1231 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1232 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1233 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1234 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1235 /* VRs */
1236 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1237 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1238 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1239 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1240 /* lr ctr ca ap */
1241 "lr", "ctr", "ca", "ap",
1242 /* cr0..cr7 */
1243 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1244 /* vrsave vscr sfp */
1245 "vrsave", "vscr", "sfp",
1246 };
1247 #endif
1248
1249 /* Table of valid machine attributes. */
1250
1251 static const struct attribute_spec rs6000_attribute_table[] =
1252 {
1253 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1254 affects_type_identity, handler, exclude } */
1255 { "altivec", 1, 1, false, true, false, false,
1256 rs6000_handle_altivec_attribute, NULL },
1257 { "longcall", 0, 0, false, true, true, false,
1258 rs6000_handle_longcall_attribute, NULL },
1259 { "shortcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute, NULL },
1261 { "ms_struct", 0, 0, false, false, false, false,
1262 rs6000_handle_struct_attribute, NULL },
1263 { "gcc_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute, NULL },
1265 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1266 SUBTARGET_ATTRIBUTE_TABLE,
1267 #endif
1268 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1269 };
1270 \f
1271 #ifndef TARGET_PROFILE_KERNEL
1272 #define TARGET_PROFILE_KERNEL 0
1273 #endif
1274 \f
1275 /* Initialize the GCC target structure. */
1276 #undef TARGET_ATTRIBUTE_TABLE
1277 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1278 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1279 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1280 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1281 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1282
1283 #undef TARGET_ASM_ALIGNED_DI_OP
1284 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1285
1286 /* Default unaligned ops are only provided for ELF. Find the ops needed
1287 for non-ELF systems. */
1288 #ifndef OBJECT_FORMAT_ELF
1289 #if TARGET_XCOFF
1290 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1291 64-bit targets. */
1292 #undef TARGET_ASM_UNALIGNED_HI_OP
1293 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1294 #undef TARGET_ASM_UNALIGNED_SI_OP
1295 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1296 #undef TARGET_ASM_UNALIGNED_DI_OP
1297 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1298 #else
1299 /* For Darwin. */
1300 #undef TARGET_ASM_UNALIGNED_HI_OP
1301 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1302 #undef TARGET_ASM_UNALIGNED_SI_OP
1303 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1304 #undef TARGET_ASM_UNALIGNED_DI_OP
1305 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1306 #undef TARGET_ASM_ALIGNED_DI_OP
1307 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1308 #endif
1309 #endif
1310
1311 /* This hook deals with fixups for relocatable code and DI-mode objects
1312 in 64-bit code. */
1313 #undef TARGET_ASM_INTEGER
1314 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1315
1316 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1317 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1318 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1319 #endif
1320
1321 #undef TARGET_SET_UP_BY_PROLOGUE
1322 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1323
1324 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1325 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1326 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1327 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1328 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1330 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1334 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1336
1337 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1338 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1339
1340 #undef TARGET_INTERNAL_ARG_POINTER
1341 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1342
1343 #undef TARGET_HAVE_TLS
1344 #define TARGET_HAVE_TLS HAVE_AS_TLS
1345
1346 #undef TARGET_CANNOT_FORCE_CONST_MEM
1347 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1348
1349 #undef TARGET_DELEGITIMIZE_ADDRESS
1350 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1351
1352 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1353 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1354
1355 #undef TARGET_LEGITIMATE_COMBINED_INSN
1356 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1357
1358 #undef TARGET_ASM_FUNCTION_PROLOGUE
1359 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1360 #undef TARGET_ASM_FUNCTION_EPILOGUE
1361 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1362
1363 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1364 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1365
1366 #undef TARGET_LEGITIMIZE_ADDRESS
1367 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1368
1369 #undef TARGET_SCHED_VARIABLE_ISSUE
1370 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1371
1372 #undef TARGET_SCHED_ISSUE_RATE
1373 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1374 #undef TARGET_SCHED_ADJUST_COST
1375 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1376 #undef TARGET_SCHED_ADJUST_PRIORITY
1377 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1378 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1379 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1380 #undef TARGET_SCHED_INIT
1381 #define TARGET_SCHED_INIT rs6000_sched_init
1382 #undef TARGET_SCHED_FINISH
1383 #define TARGET_SCHED_FINISH rs6000_sched_finish
1384 #undef TARGET_SCHED_REORDER
1385 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1386 #undef TARGET_SCHED_REORDER2
1387 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1388
1389 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1390 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1391
1392 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1393 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1394
1395 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1396 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1397 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1398 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1399 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1400 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1401 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1402 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1403
1404 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1405 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1406
1407 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1408 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1409 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1410 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1411 rs6000_builtin_support_vector_misalignment
1412 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1413 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1414 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1415 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1416 rs6000_builtin_vectorization_cost
1417 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1418 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1419 rs6000_preferred_simd_mode
1420 #undef TARGET_VECTORIZE_INIT_COST
1421 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1422 #undef TARGET_VECTORIZE_ADD_STMT_COST
1423 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1424 #undef TARGET_VECTORIZE_FINISH_COST
1425 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1426 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1427 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1428
1429 #undef TARGET_LOOP_UNROLL_ADJUST
1430 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1431
1432 #undef TARGET_INIT_BUILTINS
1433 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1434 #undef TARGET_BUILTIN_DECL
1435 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1436
1437 #undef TARGET_FOLD_BUILTIN
1438 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1439 #undef TARGET_GIMPLE_FOLD_BUILTIN
1440 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1441
1442 #undef TARGET_EXPAND_BUILTIN
1443 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1444
1445 #undef TARGET_MANGLE_TYPE
1446 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1447
1448 #undef TARGET_INIT_LIBFUNCS
1449 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1450
1451 #if TARGET_MACHO
1452 #undef TARGET_BINDS_LOCAL_P
1453 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1454 #endif
1455
1456 #undef TARGET_MS_BITFIELD_LAYOUT_P
1457 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1458
1459 #undef TARGET_ASM_OUTPUT_MI_THUNK
1460 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1461
1462 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1463 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1464
1465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1466 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1467
1468 #undef TARGET_REGISTER_MOVE_COST
1469 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1470 #undef TARGET_MEMORY_MOVE_COST
1471 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1472 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1473 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1474 rs6000_ira_change_pseudo_allocno_class
1475 #undef TARGET_CANNOT_COPY_INSN_P
1476 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1477 #undef TARGET_RTX_COSTS
1478 #define TARGET_RTX_COSTS rs6000_rtx_costs
1479 #undef TARGET_ADDRESS_COST
1480 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1481 #undef TARGET_INSN_COST
1482 #define TARGET_INSN_COST rs6000_insn_cost
1483
1484 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1485 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1486
1487 #undef TARGET_PROMOTE_FUNCTION_MODE
1488 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1489
1490 #undef TARGET_RETURN_IN_MEMORY
1491 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1492
1493 #undef TARGET_RETURN_IN_MSB
1494 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1495
1496 #undef TARGET_SETUP_INCOMING_VARARGS
1497 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1498
1499 /* Always strict argument naming on rs6000. */
1500 #undef TARGET_STRICT_ARGUMENT_NAMING
1501 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1502 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1503 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1504 #undef TARGET_SPLIT_COMPLEX_ARG
1505 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1506 #undef TARGET_MUST_PASS_IN_STACK
1507 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1508 #undef TARGET_PASS_BY_REFERENCE
1509 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1510 #undef TARGET_ARG_PARTIAL_BYTES
1511 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1512 #undef TARGET_FUNCTION_ARG_ADVANCE
1513 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1514 #undef TARGET_FUNCTION_ARG
1515 #define TARGET_FUNCTION_ARG rs6000_function_arg
1516 #undef TARGET_FUNCTION_ARG_PADDING
1517 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1518 #undef TARGET_FUNCTION_ARG_BOUNDARY
1519 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1520
1521 #undef TARGET_BUILD_BUILTIN_VA_LIST
1522 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1523
1524 #undef TARGET_EXPAND_BUILTIN_VA_START
1525 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1526
1527 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1528 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1529
1530 #undef TARGET_EH_RETURN_FILTER_MODE
1531 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1532
1533 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1534 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1535
1536 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1537 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1538
1539 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1540 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1541
1542 #undef TARGET_FLOATN_MODE
1543 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1544
1545 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1546 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1547
1548 #undef TARGET_MD_ASM_ADJUST
1549 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1550
1551 #undef TARGET_OPTION_OVERRIDE
1552 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1553
1554 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1555 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1556 rs6000_builtin_vectorized_function
1557
1558 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1559 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1560 rs6000_builtin_md_vectorized_function
1561
1562 #undef TARGET_STACK_PROTECT_GUARD
1563 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1564
1565 #if !TARGET_MACHO
1566 #undef TARGET_STACK_PROTECT_FAIL
1567 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1568 #endif
1569
1570 #ifdef HAVE_AS_TLS
1571 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1572 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1573 #endif
1574
1575 /* Use a 32-bit anchor range. This leads to sequences like:
1576
1577 addis tmp,anchor,high
1578 add dest,tmp,low
1579
1580 where tmp itself acts as an anchor, and can be shared between
1581 accesses to the same 64k page. */
1582 #undef TARGET_MIN_ANCHOR_OFFSET
1583 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1584 #undef TARGET_MAX_ANCHOR_OFFSET
1585 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1586 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1587 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1588 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1589 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1590
1591 #undef TARGET_BUILTIN_RECIPROCAL
1592 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1593
1594 #undef TARGET_SECONDARY_RELOAD
1595 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1596 #undef TARGET_SECONDARY_MEMORY_NEEDED
1597 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1598 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1599 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1600
1601 #undef TARGET_LEGITIMATE_ADDRESS_P
1602 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1603
1604 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1605 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1606
1607 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1608 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1609
1610 #undef TARGET_CAN_ELIMINATE
1611 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1612
1613 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1614 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1615
1616 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1617 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1618
1619 #undef TARGET_TRAMPOLINE_INIT
1620 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1621
1622 #undef TARGET_FUNCTION_VALUE
1623 #define TARGET_FUNCTION_VALUE rs6000_function_value
1624
1625 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1626 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1627
1628 #undef TARGET_OPTION_SAVE
1629 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1630
1631 #undef TARGET_OPTION_RESTORE
1632 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1633
1634 #undef TARGET_OPTION_PRINT
1635 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1636
1637 #undef TARGET_CAN_INLINE_P
1638 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1639
1640 #undef TARGET_SET_CURRENT_FUNCTION
1641 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1642
1643 #undef TARGET_LEGITIMATE_CONSTANT_P
1644 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1645
1646 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1647 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1648
1649 #undef TARGET_CAN_USE_DOLOOP_P
1650 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1651
1652 #undef TARGET_PREDICT_DOLOOP_P
1653 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1654
1655 #undef TARGET_HAVE_COUNT_REG_DECR_P
1656 #define TARGET_HAVE_COUNT_REG_DECR_P true
1657
1658 /* 1000000000 is infinite cost in IVOPTs. */
1659 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1660 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1661
1662 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1663 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1664
1665 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1666 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1667
1668 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1669 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1670 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1671 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1672 #undef TARGET_UNWIND_WORD_MODE
1673 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1674
1675 #undef TARGET_OFFLOAD_OPTIONS
1676 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1677
1678 #undef TARGET_C_MODE_FOR_SUFFIX
1679 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1680
1681 #undef TARGET_INVALID_BINARY_OP
1682 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1683
1684 #undef TARGET_OPTAB_SUPPORTED_P
1685 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1686
1687 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1688 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1689
1690 #undef TARGET_COMPARE_VERSION_PRIORITY
1691 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1692
1693 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1694 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1695 rs6000_generate_version_dispatcher_body
1696
1697 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1698 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1699 rs6000_get_function_versions_dispatcher
1700
1701 #undef TARGET_OPTION_FUNCTION_VERSIONS
1702 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1703
1704 #undef TARGET_HARD_REGNO_NREGS
1705 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1706 #undef TARGET_HARD_REGNO_MODE_OK
1707 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1708
1709 #undef TARGET_MODES_TIEABLE_P
1710 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1711
1712 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1713 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1714 rs6000_hard_regno_call_part_clobbered
1715
1716 #undef TARGET_SLOW_UNALIGNED_ACCESS
1717 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1718
1719 #undef TARGET_CAN_CHANGE_MODE_CLASS
1720 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1721
1722 #undef TARGET_CONSTANT_ALIGNMENT
1723 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1724
1725 #undef TARGET_STARTING_FRAME_OFFSET
1726 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1727
1728 #if TARGET_ELF && RS6000_WEAK
1729 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1730 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1731 #endif
1732
1733 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1734 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1735
1736 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1737 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1738 \f
1739
1740 /* Processor table. */
1741 struct rs6000_ptt
1742 {
1743 const char *const name; /* Canonical processor name. */
1744 const enum processor_type processor; /* Processor type enum value. */
1745 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1746 };
1747
1748 static struct rs6000_ptt const processor_target_table[] =
1749 {
1750 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1751 #include "rs6000-cpus.def"
1752 #undef RS6000_CPU
1753 };
1754
1755 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1756 name is invalid. */
1757
1758 static int
1759 rs6000_cpu_name_lookup (const char *name)
1760 {
1761 size_t i;
1762
1763 if (name != NULL)
1764 {
1765 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1766 if (! strcmp (name, processor_target_table[i].name))
1767 return (int)i;
1768 }
1769
1770 return -1;
1771 }
1772
1773 \f
1774 /* Return number of consecutive hard regs needed starting at reg REGNO
1775 to hold something of mode MODE.
1776 This is ordinarily the length in words of a value of mode MODE
1777 but can be less for certain modes in special long registers.
1778
1779 POWER and PowerPC GPRs hold 32 bits worth;
1780 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1781
1782 static int
1783 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1784 {
1785 unsigned HOST_WIDE_INT reg_size;
1786
1787 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1788 128-bit floating point that can go in vector registers, which has VSX
1789 memory addressing. */
1790 if (FP_REGNO_P (regno))
1791 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1792 ? UNITS_PER_VSX_WORD
1793 : UNITS_PER_FP_WORD);
1794
1795 else if (ALTIVEC_REGNO_P (regno))
1796 reg_size = UNITS_PER_ALTIVEC_WORD;
1797
1798 else
1799 reg_size = UNITS_PER_WORD;
1800
1801 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1802 }
1803
1804 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1805 MODE. */
1806 static int
1807 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1808 {
1809 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1810
1811 if (COMPLEX_MODE_P (mode))
1812 mode = GET_MODE_INNER (mode);
1813
1814 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1815 register combinations, and use PTImode where we need to deal with quad
1816 word memory operations. Don't allow quad words in the argument or frame
1817 pointer registers, just registers 0..31. */
1818 if (mode == PTImode)
1819 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1820 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1821 && ((regno & 1) == 0));
1822
1823 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1824 implementations. Don't allow an item to be split between a FP register
1825 and an Altivec register. Allow TImode in all VSX registers if the user
1826 asked for it. */
1827 if (TARGET_VSX && VSX_REGNO_P (regno)
1828 && (VECTOR_MEM_VSX_P (mode)
1829 || FLOAT128_VECTOR_P (mode)
1830 || reg_addr[mode].scalar_in_vmx_p
1831 || mode == TImode
1832 || (TARGET_VADDUQM && mode == V1TImode)))
1833 {
1834 if (FP_REGNO_P (regno))
1835 return FP_REGNO_P (last_regno);
1836
1837 if (ALTIVEC_REGNO_P (regno))
1838 {
1839 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1840 return 0;
1841
1842 return ALTIVEC_REGNO_P (last_regno);
1843 }
1844 }
1845
1846 /* The GPRs can hold any mode, but values bigger than one register
1847 cannot go past R31. */
1848 if (INT_REGNO_P (regno))
1849 return INT_REGNO_P (last_regno);
1850
1851 /* The float registers (except for VSX vector modes) can only hold floating
1852 modes and DImode. */
1853 if (FP_REGNO_P (regno))
1854 {
1855 if (FLOAT128_VECTOR_P (mode))
1856 return false;
1857
1858 if (SCALAR_FLOAT_MODE_P (mode)
1859 && (mode != TDmode || (regno % 2) == 0)
1860 && FP_REGNO_P (last_regno))
1861 return 1;
1862
1863 if (GET_MODE_CLASS (mode) == MODE_INT)
1864 {
1865 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1866 return 1;
1867
1868 if (TARGET_P8_VECTOR && (mode == SImode))
1869 return 1;
1870
1871 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1872 return 1;
1873 }
1874
1875 return 0;
1876 }
1877
1878 /* The CR register can only hold CC modes. */
1879 if (CR_REGNO_P (regno))
1880 return GET_MODE_CLASS (mode) == MODE_CC;
1881
1882 if (CA_REGNO_P (regno))
1883 return mode == Pmode || mode == SImode;
1884
1885 /* AltiVec only in AldyVec registers. */
1886 if (ALTIVEC_REGNO_P (regno))
1887 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1888 || mode == V1TImode);
1889
1890 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1891 and it must be able to fit within the register set. */
1892
1893 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1894 }
1895
1896 /* Implement TARGET_HARD_REGNO_NREGS. */
1897
1898 static unsigned int
1899 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1900 {
1901 return rs6000_hard_regno_nregs[mode][regno];
1902 }
1903
1904 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1905
1906 static bool
1907 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1908 {
1909 return rs6000_hard_regno_mode_ok_p[mode][regno];
1910 }
1911
1912 /* Implement TARGET_MODES_TIEABLE_P.
1913
1914 PTImode cannot tie with other modes because PTImode is restricted to even
1915 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1916 57744).
1917
1918 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1919 128-bit floating point on VSX systems ties with other vectors. */
1920
1921 static bool
1922 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1923 {
1924 if (mode1 == PTImode)
1925 return mode2 == PTImode;
1926 if (mode2 == PTImode)
1927 return false;
1928
1929 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1930 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1931 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1932 return false;
1933
1934 if (SCALAR_FLOAT_MODE_P (mode1))
1935 return SCALAR_FLOAT_MODE_P (mode2);
1936 if (SCALAR_FLOAT_MODE_P (mode2))
1937 return false;
1938
1939 if (GET_MODE_CLASS (mode1) == MODE_CC)
1940 return GET_MODE_CLASS (mode2) == MODE_CC;
1941 if (GET_MODE_CLASS (mode2) == MODE_CC)
1942 return false;
1943
1944 return true;
1945 }
1946
1947 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1948
1949 static bool
1950 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1951 machine_mode mode)
1952 {
1953 if (TARGET_32BIT
1954 && TARGET_POWERPC64
1955 && GET_MODE_SIZE (mode) > 4
1956 && INT_REGNO_P (regno))
1957 return true;
1958
1959 if (TARGET_VSX
1960 && FP_REGNO_P (regno)
1961 && GET_MODE_SIZE (mode) > 8
1962 && !FLOAT128_2REG_P (mode))
1963 return true;
1964
1965 return false;
1966 }
1967
1968 /* Print interesting facts about registers. */
1969 static void
1970 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1971 {
1972 int r, m;
1973
1974 for (r = first_regno; r <= last_regno; ++r)
1975 {
1976 const char *comma = "";
1977 int len;
1978
1979 if (first_regno == last_regno)
1980 fprintf (stderr, "%s:\t", reg_name);
1981 else
1982 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1983
1984 len = 8;
1985 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1986 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1987 {
1988 if (len > 70)
1989 {
1990 fprintf (stderr, ",\n\t");
1991 len = 8;
1992 comma = "";
1993 }
1994
1995 if (rs6000_hard_regno_nregs[m][r] > 1)
1996 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1997 rs6000_hard_regno_nregs[m][r]);
1998 else
1999 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2000
2001 comma = ", ";
2002 }
2003
2004 if (call_used_or_fixed_reg_p (r))
2005 {
2006 if (len > 70)
2007 {
2008 fprintf (stderr, ",\n\t");
2009 len = 8;
2010 comma = "";
2011 }
2012
2013 len += fprintf (stderr, "%s%s", comma, "call-used");
2014 comma = ", ";
2015 }
2016
2017 if (fixed_regs[r])
2018 {
2019 if (len > 70)
2020 {
2021 fprintf (stderr, ",\n\t");
2022 len = 8;
2023 comma = "";
2024 }
2025
2026 len += fprintf (stderr, "%s%s", comma, "fixed");
2027 comma = ", ";
2028 }
2029
2030 if (len > 70)
2031 {
2032 fprintf (stderr, ",\n\t");
2033 comma = "";
2034 }
2035
2036 len += fprintf (stderr, "%sreg-class = %s", comma,
2037 reg_class_names[(int)rs6000_regno_regclass[r]]);
2038 comma = ", ";
2039
2040 if (len > 70)
2041 {
2042 fprintf (stderr, ",\n\t");
2043 comma = "";
2044 }
2045
2046 fprintf (stderr, "%sregno = %d\n", comma, r);
2047 }
2048 }
2049
2050 static const char *
2051 rs6000_debug_vector_unit (enum rs6000_vector v)
2052 {
2053 const char *ret;
2054
2055 switch (v)
2056 {
2057 case VECTOR_NONE: ret = "none"; break;
2058 case VECTOR_ALTIVEC: ret = "altivec"; break;
2059 case VECTOR_VSX: ret = "vsx"; break;
2060 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2061 default: ret = "unknown"; break;
2062 }
2063
2064 return ret;
2065 }
2066
2067 /* Inner function printing just the address mask for a particular reload
2068 register class. */
2069 DEBUG_FUNCTION char *
2070 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2071 {
2072 static char ret[8];
2073 char *p = ret;
2074
2075 if ((mask & RELOAD_REG_VALID) != 0)
2076 *p++ = 'v';
2077 else if (keep_spaces)
2078 *p++ = ' ';
2079
2080 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2081 *p++ = 'm';
2082 else if (keep_spaces)
2083 *p++ = ' ';
2084
2085 if ((mask & RELOAD_REG_INDEXED) != 0)
2086 *p++ = 'i';
2087 else if (keep_spaces)
2088 *p++ = ' ';
2089
2090 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2091 *p++ = 'O';
2092 else if ((mask & RELOAD_REG_OFFSET) != 0)
2093 *p++ = 'o';
2094 else if (keep_spaces)
2095 *p++ = ' ';
2096
2097 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2098 *p++ = '+';
2099 else if (keep_spaces)
2100 *p++ = ' ';
2101
2102 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2103 *p++ = '+';
2104 else if (keep_spaces)
2105 *p++ = ' ';
2106
2107 if ((mask & RELOAD_REG_AND_M16) != 0)
2108 *p++ = '&';
2109 else if (keep_spaces)
2110 *p++ = ' ';
2111
2112 *p = '\0';
2113
2114 return ret;
2115 }
2116
2117 /* Print the address masks in a human readble fashion. */
2118 DEBUG_FUNCTION void
2119 rs6000_debug_print_mode (ssize_t m)
2120 {
2121 ssize_t rc;
2122 int spaces = 0;
2123
2124 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2125 for (rc = 0; rc < N_RELOAD_REG; rc++)
2126 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2127 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2128
2129 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2130 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2131 {
2132 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2133 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2134 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2135 spaces = 0;
2136 }
2137 else
2138 spaces += sizeof (" Reload=sl") - 1;
2139
2140 if (reg_addr[m].scalar_in_vmx_p)
2141 {
2142 fprintf (stderr, "%*s Upper=y", spaces, "");
2143 spaces = 0;
2144 }
2145 else
2146 spaces += sizeof (" Upper=y") - 1;
2147
2148 if (rs6000_vector_unit[m] != VECTOR_NONE
2149 || rs6000_vector_mem[m] != VECTOR_NONE)
2150 {
2151 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2152 spaces, "",
2153 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2154 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2155 }
2156
2157 fputs ("\n", stderr);
2158 }
2159
2160 #define DEBUG_FMT_ID "%-32s= "
2161 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2162 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2163 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2164
2165 /* Print various interesting information with -mdebug=reg. */
2166 static void
2167 rs6000_debug_reg_global (void)
2168 {
2169 static const char *const tf[2] = { "false", "true" };
2170 const char *nl = (const char *)0;
2171 int m;
2172 size_t m1, m2, v;
2173 char costly_num[20];
2174 char nop_num[20];
2175 char flags_buffer[40];
2176 const char *costly_str;
2177 const char *nop_str;
2178 const char *trace_str;
2179 const char *abi_str;
2180 const char *cmodel_str;
2181 struct cl_target_option cl_opts;
2182
2183 /* Modes we want tieable information on. */
2184 static const machine_mode print_tieable_modes[] = {
2185 QImode,
2186 HImode,
2187 SImode,
2188 DImode,
2189 TImode,
2190 PTImode,
2191 SFmode,
2192 DFmode,
2193 TFmode,
2194 IFmode,
2195 KFmode,
2196 SDmode,
2197 DDmode,
2198 TDmode,
2199 V16QImode,
2200 V8HImode,
2201 V4SImode,
2202 V2DImode,
2203 V1TImode,
2204 V32QImode,
2205 V16HImode,
2206 V8SImode,
2207 V4DImode,
2208 V2TImode,
2209 V4SFmode,
2210 V2DFmode,
2211 V8SFmode,
2212 V4DFmode,
2213 CCmode,
2214 CCUNSmode,
2215 CCEQmode,
2216 };
2217
2218 /* Virtual regs we are interested in. */
2219 const static struct {
2220 int regno; /* register number. */
2221 const char *name; /* register name. */
2222 } virtual_regs[] = {
2223 { STACK_POINTER_REGNUM, "stack pointer:" },
2224 { TOC_REGNUM, "toc: " },
2225 { STATIC_CHAIN_REGNUM, "static chain: " },
2226 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2227 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2228 { ARG_POINTER_REGNUM, "arg pointer: " },
2229 { FRAME_POINTER_REGNUM, "frame pointer:" },
2230 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2231 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2232 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2233 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2234 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2235 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2236 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2237 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2238 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2239 };
2240
2241 fputs ("\nHard register information:\n", stderr);
2242 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2243 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2244 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2245 LAST_ALTIVEC_REGNO,
2246 "vs");
2247 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2248 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2249 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2250 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2251 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2252 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2253
2254 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2255 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2256 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2257
2258 fprintf (stderr,
2259 "\n"
2260 "d reg_class = %s\n"
2261 "f reg_class = %s\n"
2262 "v reg_class = %s\n"
2263 "wa reg_class = %s\n"
2264 "we reg_class = %s\n"
2265 "wr reg_class = %s\n"
2266 "wx reg_class = %s\n"
2267 "wA reg_class = %s\n"
2268 "\n",
2269 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2270 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2276 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2277
2278 nl = "\n";
2279 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2280 rs6000_debug_print_mode (m);
2281
2282 fputs ("\n", stderr);
2283
2284 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2285 {
2286 machine_mode mode1 = print_tieable_modes[m1];
2287 bool first_time = true;
2288
2289 nl = (const char *)0;
2290 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2291 {
2292 machine_mode mode2 = print_tieable_modes[m2];
2293 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2294 {
2295 if (first_time)
2296 {
2297 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2298 nl = "\n";
2299 first_time = false;
2300 }
2301
2302 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2303 }
2304 }
2305
2306 if (!first_time)
2307 fputs ("\n", stderr);
2308 }
2309
2310 if (nl)
2311 fputs (nl, stderr);
2312
2313 if (rs6000_recip_control)
2314 {
2315 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2316
2317 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2318 if (rs6000_recip_bits[m])
2319 {
2320 fprintf (stderr,
2321 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2322 GET_MODE_NAME (m),
2323 (RS6000_RECIP_AUTO_RE_P (m)
2324 ? "auto"
2325 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2326 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2327 ? "auto"
2328 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2329 }
2330
2331 fputs ("\n", stderr);
2332 }
2333
2334 if (rs6000_cpu_index >= 0)
2335 {
2336 const char *name = processor_target_table[rs6000_cpu_index].name;
2337 HOST_WIDE_INT flags
2338 = processor_target_table[rs6000_cpu_index].target_enable;
2339
2340 sprintf (flags_buffer, "-mcpu=%s flags", name);
2341 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2342 }
2343 else
2344 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2345
2346 if (rs6000_tune_index >= 0)
2347 {
2348 const char *name = processor_target_table[rs6000_tune_index].name;
2349 HOST_WIDE_INT flags
2350 = processor_target_table[rs6000_tune_index].target_enable;
2351
2352 sprintf (flags_buffer, "-mtune=%s flags", name);
2353 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2354 }
2355 else
2356 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2357
2358 cl_target_option_save (&cl_opts, &global_options);
2359 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2360 rs6000_isa_flags);
2361
2362 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2363 rs6000_isa_flags_explicit);
2364
2365 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2366 rs6000_builtin_mask);
2367
2368 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2369
2370 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2371 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2372
2373 switch (rs6000_sched_costly_dep)
2374 {
2375 case max_dep_latency:
2376 costly_str = "max_dep_latency";
2377 break;
2378
2379 case no_dep_costly:
2380 costly_str = "no_dep_costly";
2381 break;
2382
2383 case all_deps_costly:
2384 costly_str = "all_deps_costly";
2385 break;
2386
2387 case true_store_to_load_dep_costly:
2388 costly_str = "true_store_to_load_dep_costly";
2389 break;
2390
2391 case store_to_load_dep_costly:
2392 costly_str = "store_to_load_dep_costly";
2393 break;
2394
2395 default:
2396 costly_str = costly_num;
2397 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2398 break;
2399 }
2400
2401 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2402
2403 switch (rs6000_sched_insert_nops)
2404 {
2405 case sched_finish_regroup_exact:
2406 nop_str = "sched_finish_regroup_exact";
2407 break;
2408
2409 case sched_finish_pad_groups:
2410 nop_str = "sched_finish_pad_groups";
2411 break;
2412
2413 case sched_finish_none:
2414 nop_str = "sched_finish_none";
2415 break;
2416
2417 default:
2418 nop_str = nop_num;
2419 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2420 break;
2421 }
2422
2423 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2424
2425 switch (rs6000_sdata)
2426 {
2427 default:
2428 case SDATA_NONE:
2429 break;
2430
2431 case SDATA_DATA:
2432 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2433 break;
2434
2435 case SDATA_SYSV:
2436 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2437 break;
2438
2439 case SDATA_EABI:
2440 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2441 break;
2442
2443 }
2444
2445 switch (rs6000_traceback)
2446 {
2447 case traceback_default: trace_str = "default"; break;
2448 case traceback_none: trace_str = "none"; break;
2449 case traceback_part: trace_str = "part"; break;
2450 case traceback_full: trace_str = "full"; break;
2451 default: trace_str = "unknown"; break;
2452 }
2453
2454 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2455
2456 switch (rs6000_current_cmodel)
2457 {
2458 case CMODEL_SMALL: cmodel_str = "small"; break;
2459 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2460 case CMODEL_LARGE: cmodel_str = "large"; break;
2461 default: cmodel_str = "unknown"; break;
2462 }
2463
2464 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2465
2466 switch (rs6000_current_abi)
2467 {
2468 case ABI_NONE: abi_str = "none"; break;
2469 case ABI_AIX: abi_str = "aix"; break;
2470 case ABI_ELFv2: abi_str = "ELFv2"; break;
2471 case ABI_V4: abi_str = "V4"; break;
2472 case ABI_DARWIN: abi_str = "darwin"; break;
2473 default: abi_str = "unknown"; break;
2474 }
2475
2476 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2477
2478 if (rs6000_altivec_abi)
2479 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2480
2481 if (rs6000_darwin64_abi)
2482 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2483
2484 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2485 (TARGET_SOFT_FLOAT ? "true" : "false"));
2486
2487 if (TARGET_LINK_STACK)
2488 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2489
2490 if (TARGET_P8_FUSION)
2491 {
2492 char options[80];
2493
2494 strcpy (options, "power8");
2495 if (TARGET_P8_FUSION_SIGN)
2496 strcat (options, ", sign");
2497
2498 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2499 }
2500
2501 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2502 TARGET_SECURE_PLT ? "secure" : "bss");
2503 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2504 aix_struct_return ? "aix" : "sysv");
2505 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2506 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2507 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2508 tf[!!rs6000_align_branch_targets]);
2509 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2510 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2511 rs6000_long_double_type_size);
2512 if (rs6000_long_double_type_size > 64)
2513 {
2514 fprintf (stderr, DEBUG_FMT_S, "long double type",
2515 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2516 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2517 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2518 }
2519 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2520 (int)rs6000_sched_restricted_insns_priority);
2521 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2522 (int)END_BUILTINS);
2523 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2524 (int)RS6000_BUILTIN_COUNT);
2525
2526 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2527 (int)TARGET_FLOAT128_ENABLE_TYPE);
2528
2529 if (TARGET_VSX)
2530 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2531 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2532
2533 if (TARGET_DIRECT_MOVE_128)
2534 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2535 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2536 }
2537
2538 \f
2539 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2540 legitimate address support to figure out the appropriate addressing to
2541 use. */
2542
2543 static void
2544 rs6000_setup_reg_addr_masks (void)
2545 {
2546 ssize_t rc, reg, m, nregs;
2547 addr_mask_type any_addr_mask, addr_mask;
2548
2549 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2550 {
2551 machine_mode m2 = (machine_mode) m;
2552 bool complex_p = false;
2553 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2554 size_t msize;
2555
2556 if (COMPLEX_MODE_P (m2))
2557 {
2558 complex_p = true;
2559 m2 = GET_MODE_INNER (m2);
2560 }
2561
2562 msize = GET_MODE_SIZE (m2);
2563
2564 /* SDmode is special in that we want to access it only via REG+REG
2565 addressing on power7 and above, since we want to use the LFIWZX and
2566 STFIWZX instructions to load it. */
2567 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2568
2569 any_addr_mask = 0;
2570 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2571 {
2572 addr_mask = 0;
2573 reg = reload_reg_map[rc].reg;
2574
2575 /* Can mode values go in the GPR/FPR/Altivec registers? */
2576 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2577 {
2578 bool small_int_vsx_p = (small_int_p
2579 && (rc == RELOAD_REG_FPR
2580 || rc == RELOAD_REG_VMX));
2581
2582 nregs = rs6000_hard_regno_nregs[m][reg];
2583 addr_mask |= RELOAD_REG_VALID;
2584
2585 /* Indicate if the mode takes more than 1 physical register. If
2586 it takes a single register, indicate it can do REG+REG
2587 addressing. Small integers in VSX registers can only do
2588 REG+REG addressing. */
2589 if (small_int_vsx_p)
2590 addr_mask |= RELOAD_REG_INDEXED;
2591 else if (nregs > 1 || m == BLKmode || complex_p)
2592 addr_mask |= RELOAD_REG_MULTIPLE;
2593 else
2594 addr_mask |= RELOAD_REG_INDEXED;
2595
2596 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2597 addressing. If we allow scalars into Altivec registers,
2598 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2599
2600 For VSX systems, we don't allow update addressing for
2601 DFmode/SFmode if those registers can go in both the
2602 traditional floating point registers and Altivec registers.
2603 The load/store instructions for the Altivec registers do not
2604 have update forms. If we allowed update addressing, it seems
2605 to break IV-OPT code using floating point if the index type is
2606 int instead of long (PR target/81550 and target/84042). */
2607
2608 if (TARGET_UPDATE
2609 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2610 && msize <= 8
2611 && !VECTOR_MODE_P (m2)
2612 && !FLOAT128_VECTOR_P (m2)
2613 && !complex_p
2614 && (m != E_DFmode || !TARGET_VSX)
2615 && (m != E_SFmode || !TARGET_P8_VECTOR)
2616 && !small_int_vsx_p)
2617 {
2618 addr_mask |= RELOAD_REG_PRE_INCDEC;
2619
2620 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2621 we don't allow PRE_MODIFY for some multi-register
2622 operations. */
2623 switch (m)
2624 {
2625 default:
2626 addr_mask |= RELOAD_REG_PRE_MODIFY;
2627 break;
2628
2629 case E_DImode:
2630 if (TARGET_POWERPC64)
2631 addr_mask |= RELOAD_REG_PRE_MODIFY;
2632 break;
2633
2634 case E_DFmode:
2635 case E_DDmode:
2636 if (TARGET_HARD_FLOAT)
2637 addr_mask |= RELOAD_REG_PRE_MODIFY;
2638 break;
2639 }
2640 }
2641 }
2642
2643 /* GPR and FPR registers can do REG+OFFSET addressing, except
2644 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2645 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2646 if ((addr_mask != 0) && !indexed_only_p
2647 && msize <= 8
2648 && (rc == RELOAD_REG_GPR
2649 || ((msize == 8 || m2 == SFmode)
2650 && (rc == RELOAD_REG_FPR
2651 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2652 addr_mask |= RELOAD_REG_OFFSET;
2653
2654 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2655 instructions are enabled. The offset for 128-bit VSX registers is
2656 only 12-bits. While GPRs can handle the full offset range, VSX
2657 registers can only handle the restricted range. */
2658 else if ((addr_mask != 0) && !indexed_only_p
2659 && msize == 16 && TARGET_P9_VECTOR
2660 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2661 || (m2 == TImode && TARGET_VSX)))
2662 {
2663 addr_mask |= RELOAD_REG_OFFSET;
2664 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2665 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2666 }
2667
2668 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2669 addressing on 128-bit types. */
2670 if (rc == RELOAD_REG_VMX && msize == 16
2671 && (addr_mask & RELOAD_REG_VALID) != 0)
2672 addr_mask |= RELOAD_REG_AND_M16;
2673
2674 reg_addr[m].addr_mask[rc] = addr_mask;
2675 any_addr_mask |= addr_mask;
2676 }
2677
2678 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2679 }
2680 }
2681
2682 \f
2683 /* Initialize the various global tables that are based on register size. */
2684 static void
2685 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2686 {
2687 ssize_t r, m, c;
2688 int align64;
2689 int align32;
2690
2691 /* Precalculate REGNO_REG_CLASS. */
2692 rs6000_regno_regclass[0] = GENERAL_REGS;
2693 for (r = 1; r < 32; ++r)
2694 rs6000_regno_regclass[r] = BASE_REGS;
2695
2696 for (r = 32; r < 64; ++r)
2697 rs6000_regno_regclass[r] = FLOAT_REGS;
2698
2699 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2700 rs6000_regno_regclass[r] = NO_REGS;
2701
2702 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2703 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2704
2705 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2706 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2707 rs6000_regno_regclass[r] = CR_REGS;
2708
2709 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2710 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2711 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2712 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2713 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2714 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2715 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2716
2717 /* Precalculate register class to simpler reload register class. We don't
2718 need all of the register classes that are combinations of different
2719 classes, just the simple ones that have constraint letters. */
2720 for (c = 0; c < N_REG_CLASSES; c++)
2721 reg_class_to_reg_type[c] = NO_REG_TYPE;
2722
2723 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2724 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2725 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2726 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2728 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2732 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2733
2734 if (TARGET_VSX)
2735 {
2736 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2737 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2738 }
2739 else
2740 {
2741 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2742 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2743 }
2744
2745 /* Precalculate the valid memory formats as well as the vector information,
2746 this must be set up before the rs6000_hard_regno_nregs_internal calls
2747 below. */
2748 gcc_assert ((int)VECTOR_NONE == 0);
2749 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2750 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2751
2752 gcc_assert ((int)CODE_FOR_nothing == 0);
2753 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2754
2755 gcc_assert ((int)NO_REGS == 0);
2756 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2757
2758 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2759 believes it can use native alignment or still uses 128-bit alignment. */
2760 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2761 {
2762 align64 = 64;
2763 align32 = 32;
2764 }
2765 else
2766 {
2767 align64 = 128;
2768 align32 = 128;
2769 }
2770
2771 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2772 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2773 if (TARGET_FLOAT128_TYPE)
2774 {
2775 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2776 rs6000_vector_align[KFmode] = 128;
2777
2778 if (FLOAT128_IEEE_P (TFmode))
2779 {
2780 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2781 rs6000_vector_align[TFmode] = 128;
2782 }
2783 }
2784
2785 /* V2DF mode, VSX only. */
2786 if (TARGET_VSX)
2787 {
2788 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2789 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2790 rs6000_vector_align[V2DFmode] = align64;
2791 }
2792
2793 /* V4SF mode, either VSX or Altivec. */
2794 if (TARGET_VSX)
2795 {
2796 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2797 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2798 rs6000_vector_align[V4SFmode] = align32;
2799 }
2800 else if (TARGET_ALTIVEC)
2801 {
2802 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2803 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2804 rs6000_vector_align[V4SFmode] = align32;
2805 }
2806
2807 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2808 and stores. */
2809 if (TARGET_ALTIVEC)
2810 {
2811 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2812 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2813 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2814 rs6000_vector_align[V4SImode] = align32;
2815 rs6000_vector_align[V8HImode] = align32;
2816 rs6000_vector_align[V16QImode] = align32;
2817
2818 if (TARGET_VSX)
2819 {
2820 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2821 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2822 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2823 }
2824 else
2825 {
2826 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2827 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2828 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2829 }
2830 }
2831
2832 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2833 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2834 if (TARGET_VSX)
2835 {
2836 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2837 rs6000_vector_unit[V2DImode]
2838 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2839 rs6000_vector_align[V2DImode] = align64;
2840
2841 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2842 rs6000_vector_unit[V1TImode]
2843 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2844 rs6000_vector_align[V1TImode] = 128;
2845 }
2846
2847 /* DFmode, see if we want to use the VSX unit. Memory is handled
2848 differently, so don't set rs6000_vector_mem. */
2849 if (TARGET_VSX)
2850 {
2851 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2852 rs6000_vector_align[DFmode] = 64;
2853 }
2854
2855 /* SFmode, see if we want to use the VSX unit. */
2856 if (TARGET_P8_VECTOR)
2857 {
2858 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2859 rs6000_vector_align[SFmode] = 32;
2860 }
2861
2862 /* Allow TImode in VSX register and set the VSX memory macros. */
2863 if (TARGET_VSX)
2864 {
2865 rs6000_vector_mem[TImode] = VECTOR_VSX;
2866 rs6000_vector_align[TImode] = align64;
2867 }
2868
2869 /* Register class constraints for the constraints that depend on compile
2870 switches. When the VSX code was added, different constraints were added
2871 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2872 of the VSX registers are used. The register classes for scalar floating
2873 point types is set, based on whether we allow that type into the upper
2874 (Altivec) registers. GCC has register classes to target the Altivec
2875 registers for load/store operations, to select using a VSX memory
2876 operation instead of the traditional floating point operation. The
2877 constraints are:
2878
2879 d - Register class to use with traditional DFmode instructions.
2880 f - Register class to use with traditional SFmode instructions.
2881 v - Altivec register.
2882 wa - Any VSX register.
2883 wc - Reserved to represent individual CR bits (used in LLVM).
2884 wn - always NO_REGS.
2885 wr - GPR if 64-bit mode is permitted.
2886 wx - Float register if we can do 32-bit int stores. */
2887
2888 if (TARGET_HARD_FLOAT)
2889 {
2890 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2891 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2892 }
2893
2894 if (TARGET_VSX)
2895 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2896
2897 /* Add conditional constraints based on various options, to allow us to
2898 collapse multiple insn patterns. */
2899 if (TARGET_ALTIVEC)
2900 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2901
2902 if (TARGET_POWERPC64)
2903 {
2904 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2905 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2906 }
2907
2908 if (TARGET_STFIWX)
2909 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2910
2911 /* Support for new direct moves (ISA 3.0 + 64bit). */
2912 if (TARGET_DIRECT_MOVE_128)
2913 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2914
2915 /* Set up the reload helper and direct move functions. */
2916 if (TARGET_VSX || TARGET_ALTIVEC)
2917 {
2918 if (TARGET_64BIT)
2919 {
2920 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2921 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2922 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2923 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2924 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2925 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2926 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2927 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2928 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2929 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2930 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2931 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2932 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2933 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2934 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2935 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2936 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2937 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2938 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2939 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2940
2941 if (FLOAT128_VECTOR_P (KFmode))
2942 {
2943 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2944 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2945 }
2946
2947 if (FLOAT128_VECTOR_P (TFmode))
2948 {
2949 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2950 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2951 }
2952
2953 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2954 available. */
2955 if (TARGET_NO_SDMODE_STACK)
2956 {
2957 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2958 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2959 }
2960
2961 if (TARGET_VSX)
2962 {
2963 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2964 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2965 }
2966
2967 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2968 {
2969 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2970 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2971 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2972 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2973 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2974 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2975 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2976 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2977 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2978
2979 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2980 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2981 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2982 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2983 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2984 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2985 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2986 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2987 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2988
2989 if (FLOAT128_VECTOR_P (KFmode))
2990 {
2991 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2992 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2993 }
2994
2995 if (FLOAT128_VECTOR_P (TFmode))
2996 {
2997 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
2998 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
2999 }
3000 }
3001 }
3002 else
3003 {
3004 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3005 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3006 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3007 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3008 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3009 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3010 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3011 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3012 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3013 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3014 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3015 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3016 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3017 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3018 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3019 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3020 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3021 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3022 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3023 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3024
3025 if (FLOAT128_VECTOR_P (KFmode))
3026 {
3027 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3028 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3029 }
3030
3031 if (FLOAT128_IEEE_P (TFmode))
3032 {
3033 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3034 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3035 }
3036
3037 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3038 available. */
3039 if (TARGET_NO_SDMODE_STACK)
3040 {
3041 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3042 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3043 }
3044
3045 if (TARGET_VSX)
3046 {
3047 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3048 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3049 }
3050
3051 if (TARGET_DIRECT_MOVE)
3052 {
3053 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3054 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3055 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3056 }
3057 }
3058
3059 reg_addr[DFmode].scalar_in_vmx_p = true;
3060 reg_addr[DImode].scalar_in_vmx_p = true;
3061
3062 if (TARGET_P8_VECTOR)
3063 {
3064 reg_addr[SFmode].scalar_in_vmx_p = true;
3065 reg_addr[SImode].scalar_in_vmx_p = true;
3066
3067 if (TARGET_P9_VECTOR)
3068 {
3069 reg_addr[HImode].scalar_in_vmx_p = true;
3070 reg_addr[QImode].scalar_in_vmx_p = true;
3071 }
3072 }
3073 }
3074
3075 /* Precalculate HARD_REGNO_NREGS. */
3076 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3077 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3078 rs6000_hard_regno_nregs[m][r]
3079 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3080
3081 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3082 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3083 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3084 rs6000_hard_regno_mode_ok_p[m][r]
3085 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3086
3087 /* Precalculate CLASS_MAX_NREGS sizes. */
3088 for (c = 0; c < LIM_REG_CLASSES; ++c)
3089 {
3090 int reg_size;
3091
3092 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3093 reg_size = UNITS_PER_VSX_WORD;
3094
3095 else if (c == ALTIVEC_REGS)
3096 reg_size = UNITS_PER_ALTIVEC_WORD;
3097
3098 else if (c == FLOAT_REGS)
3099 reg_size = UNITS_PER_FP_WORD;
3100
3101 else
3102 reg_size = UNITS_PER_WORD;
3103
3104 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3105 {
3106 machine_mode m2 = (machine_mode)m;
3107 int reg_size2 = reg_size;
3108
3109 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3110 in VSX. */
3111 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3112 reg_size2 = UNITS_PER_FP_WORD;
3113
3114 rs6000_class_max_nregs[m][c]
3115 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3116 }
3117 }
3118
3119 /* Calculate which modes to automatically generate code to use a the
3120 reciprocal divide and square root instructions. In the future, possibly
3121 automatically generate the instructions even if the user did not specify
3122 -mrecip. The older machines double precision reciprocal sqrt estimate is
3123 not accurate enough. */
3124 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3125 if (TARGET_FRES)
3126 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3127 if (TARGET_FRE)
3128 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3129 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3130 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3131 if (VECTOR_UNIT_VSX_P (V2DFmode))
3132 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3133
3134 if (TARGET_FRSQRTES)
3135 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3136 if (TARGET_FRSQRTE)
3137 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3138 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3139 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3140 if (VECTOR_UNIT_VSX_P (V2DFmode))
3141 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3142
3143 if (rs6000_recip_control)
3144 {
3145 if (!flag_finite_math_only)
3146 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3147 "-ffast-math");
3148 if (flag_trapping_math)
3149 warning (0, "%qs requires %qs or %qs", "-mrecip",
3150 "-fno-trapping-math", "-ffast-math");
3151 if (!flag_reciprocal_math)
3152 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3153 "-ffast-math");
3154 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3155 {
3156 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3157 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3158 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3159
3160 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3161 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3162 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3163
3164 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3165 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3166 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3167
3168 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3169 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3170 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3171
3172 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3173 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3174 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3175
3176 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3177 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3178 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3179
3180 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3181 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3182 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3183
3184 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3185 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3186 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3187 }
3188 }
3189
3190 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3191 legitimate address support to figure out the appropriate addressing to
3192 use. */
3193 rs6000_setup_reg_addr_masks ();
3194
3195 if (global_init_p || TARGET_DEBUG_TARGET)
3196 {
3197 if (TARGET_DEBUG_REG)
3198 rs6000_debug_reg_global ();
3199
3200 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3201 fprintf (stderr,
3202 "SImode variable mult cost = %d\n"
3203 "SImode constant mult cost = %d\n"
3204 "SImode short constant mult cost = %d\n"
3205 "DImode multipliciation cost = %d\n"
3206 "SImode division cost = %d\n"
3207 "DImode division cost = %d\n"
3208 "Simple fp operation cost = %d\n"
3209 "DFmode multiplication cost = %d\n"
3210 "SFmode division cost = %d\n"
3211 "DFmode division cost = %d\n"
3212 "cache line size = %d\n"
3213 "l1 cache size = %d\n"
3214 "l2 cache size = %d\n"
3215 "simultaneous prefetches = %d\n"
3216 "\n",
3217 rs6000_cost->mulsi,
3218 rs6000_cost->mulsi_const,
3219 rs6000_cost->mulsi_const9,
3220 rs6000_cost->muldi,
3221 rs6000_cost->divsi,
3222 rs6000_cost->divdi,
3223 rs6000_cost->fp,
3224 rs6000_cost->dmul,
3225 rs6000_cost->sdiv,
3226 rs6000_cost->ddiv,
3227 rs6000_cost->cache_line_size,
3228 rs6000_cost->l1_cache_size,
3229 rs6000_cost->l2_cache_size,
3230 rs6000_cost->simultaneous_prefetches);
3231 }
3232 }
3233
3234 #if TARGET_MACHO
3235 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3236
3237 static void
3238 darwin_rs6000_override_options (void)
3239 {
3240 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3241 off. */
3242 rs6000_altivec_abi = 1;
3243 TARGET_ALTIVEC_VRSAVE = 1;
3244 rs6000_current_abi = ABI_DARWIN;
3245
3246 if (DEFAULT_ABI == ABI_DARWIN
3247 && TARGET_64BIT)
3248 darwin_one_byte_bool = 1;
3249
3250 if (TARGET_64BIT && ! TARGET_POWERPC64)
3251 {
3252 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3253 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3254 }
3255
3256 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3257 optimisation, and will not work with the most generic case (where the
3258 symbol is undefined external, but there is no symbl stub). */
3259 if (TARGET_64BIT)
3260 rs6000_default_long_calls = 0;
3261
3262 /* ld_classic is (so far) still used for kernel (static) code, and supports
3263 the JBSR longcall / branch islands. */
3264 if (flag_mkernel)
3265 {
3266 rs6000_default_long_calls = 1;
3267
3268 /* Allow a kext author to do -mkernel -mhard-float. */
3269 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3270 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3271 }
3272
3273 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3274 Altivec. */
3275 if (!flag_mkernel && !flag_apple_kext
3276 && TARGET_64BIT
3277 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3278 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3279
3280 /* Unless the user (not the configurer) has explicitly overridden
3281 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3282 G4 unless targeting the kernel. */
3283 if (!flag_mkernel
3284 && !flag_apple_kext
3285 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3286 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3287 && ! global_options_set.x_rs6000_cpu_index)
3288 {
3289 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3290 }
3291 }
3292 #endif
3293
3294 /* If not otherwise specified by a target, make 'long double' equivalent to
3295 'double'. */
3296
3297 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3298 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3299 #endif
3300
3301 /* Return the builtin mask of the various options used that could affect which
3302 builtins were used. In the past we used target_flags, but we've run out of
3303 bits, and some options are no longer in target_flags. */
3304
3305 HOST_WIDE_INT
3306 rs6000_builtin_mask_calculate (void)
3307 {
3308 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3309 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3310 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3311 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3312 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3313 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3314 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3315 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3316 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3317 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3318 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3319 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3320 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3321 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3322 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3323 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3324 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3325 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3326 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3327 | ((TARGET_LONG_DOUBLE_128
3328 && TARGET_HARD_FLOAT
3329 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3330 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3331 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3332 }
3333
3334 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3335 to clobber the XER[CA] bit because clobbering that bit without telling
3336 the compiler worked just fine with versions of GCC before GCC 5, and
3337 breaking a lot of older code in ways that are hard to track down is
3338 not such a great idea. */
3339
3340 static rtx_insn *
3341 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3342 vec<const char *> &/*constraints*/,
3343 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3344 {
3345 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3346 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3347 return NULL;
3348 }
3349
3350 /* Override command line options.
3351
3352 Combine build-specific configuration information with options
3353 specified on the command line to set various state variables which
3354 influence code generation, optimization, and expansion of built-in
3355 functions. Assure that command-line configuration preferences are
3356 compatible with each other and with the build configuration; issue
3357 warnings while adjusting configuration or error messages while
3358 rejecting configuration.
3359
3360 Upon entry to this function:
3361
3362 This function is called once at the beginning of
3363 compilation, and then again at the start and end of compiling
3364 each section of code that has a different configuration, as
3365 indicated, for example, by adding the
3366
3367 __attribute__((__target__("cpu=power9")))
3368
3369 qualifier to a function definition or, for example, by bracketing
3370 code between
3371
3372 #pragma GCC target("altivec")
3373
3374 and
3375
3376 #pragma GCC reset_options
3377
3378 directives. Parameter global_init_p is true for the initial
3379 invocation, which initializes global variables, and false for all
3380 subsequent invocations.
3381
3382
3383 Various global state information is assumed to be valid. This
3384 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3385 default CPU specified at build configure time, TARGET_DEFAULT,
3386 representing the default set of option flags for the default
3387 target, and global_options_set.x_rs6000_isa_flags, representing
3388 which options were requested on the command line.
3389
3390 Upon return from this function:
3391
3392 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3393 was set by name on the command line. Additionally, if certain
3394 attributes are automatically enabled or disabled by this function
3395 in order to assure compatibility between options and
3396 configuration, the flags associated with those attributes are
3397 also set. By setting these "explicit bits", we avoid the risk
3398 that other code might accidentally overwrite these particular
3399 attributes with "default values".
3400
3401 The various bits of rs6000_isa_flags are set to indicate the
3402 target options that have been selected for the most current
3403 compilation efforts. This has the effect of also turning on the
3404 associated TARGET_XXX values since these are macros which are
3405 generally defined to test the corresponding bit of the
3406 rs6000_isa_flags variable.
3407
3408 The variable rs6000_builtin_mask is set to represent the target
3409 options for the most current compilation efforts, consistent with
3410 the current contents of rs6000_isa_flags. This variable controls
3411 expansion of built-in functions.
3412
3413 Various other global variables and fields of global structures
3414 (over 50 in all) are initialized to reflect the desired options
3415 for the most current compilation efforts. */
3416
3417 static bool
3418 rs6000_option_override_internal (bool global_init_p)
3419 {
3420 bool ret = true;
3421
3422 HOST_WIDE_INT set_masks;
3423 HOST_WIDE_INT ignore_masks;
3424 int cpu_index = -1;
3425 int tune_index;
3426 struct cl_target_option *main_target_opt
3427 = ((global_init_p || target_option_default_node == NULL)
3428 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3429
3430 /* Print defaults. */
3431 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3432 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3433
3434 /* Remember the explicit arguments. */
3435 if (global_init_p)
3436 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3437
3438 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3439 library functions, so warn about it. The flag may be useful for
3440 performance studies from time to time though, so don't disable it
3441 entirely. */
3442 if (global_options_set.x_rs6000_alignment_flags
3443 && rs6000_alignment_flags == MASK_ALIGN_POWER
3444 && DEFAULT_ABI == ABI_DARWIN
3445 && TARGET_64BIT)
3446 warning (0, "%qs is not supported for 64-bit Darwin;"
3447 " it is incompatible with the installed C and C++ libraries",
3448 "-malign-power");
3449
3450 /* Numerous experiment shows that IRA based loop pressure
3451 calculation works better for RTL loop invariant motion on targets
3452 with enough (>= 32) registers. It is an expensive optimization.
3453 So it is on only for peak performance. */
3454 if (optimize >= 3 && global_init_p
3455 && !global_options_set.x_flag_ira_loop_pressure)
3456 flag_ira_loop_pressure = 1;
3457
3458 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3459 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3460 options were already specified. */
3461 if (flag_sanitize & SANITIZE_USER_ADDRESS
3462 && !global_options_set.x_flag_asynchronous_unwind_tables)
3463 flag_asynchronous_unwind_tables = 1;
3464
3465 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3466 loop unroller is active. It is only checked during unrolling, so
3467 we can just set it on by default. */
3468 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3469 flag_variable_expansion_in_unroller = 1;
3470
3471 /* Set the pointer size. */
3472 if (TARGET_64BIT)
3473 {
3474 rs6000_pmode = DImode;
3475 rs6000_pointer_size = 64;
3476 }
3477 else
3478 {
3479 rs6000_pmode = SImode;
3480 rs6000_pointer_size = 32;
3481 }
3482
3483 /* Some OSs don't support saving the high part of 64-bit registers on context
3484 switch. Other OSs don't support saving Altivec registers. On those OSs,
3485 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3486 if the user wants either, the user must explicitly specify them and we
3487 won't interfere with the user's specification. */
3488
3489 set_masks = POWERPC_MASKS;
3490 #ifdef OS_MISSING_POWERPC64
3491 if (OS_MISSING_POWERPC64)
3492 set_masks &= ~OPTION_MASK_POWERPC64;
3493 #endif
3494 #ifdef OS_MISSING_ALTIVEC
3495 if (OS_MISSING_ALTIVEC)
3496 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3497 | OTHER_VSX_VECTOR_MASKS);
3498 #endif
3499
3500 /* Don't override by the processor default if given explicitly. */
3501 set_masks &= ~rs6000_isa_flags_explicit;
3502
3503 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3504 the cpu in a target attribute or pragma, but did not specify a tuning
3505 option, use the cpu for the tuning option rather than the option specified
3506 with -mtune on the command line. Process a '--with-cpu' configuration
3507 request as an implicit --cpu. */
3508 if (rs6000_cpu_index >= 0)
3509 cpu_index = rs6000_cpu_index;
3510 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3511 cpu_index = main_target_opt->x_rs6000_cpu_index;
3512 else if (OPTION_TARGET_CPU_DEFAULT)
3513 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3514
3515 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3516 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3517 with those from the cpu, except for options that were explicitly set. If
3518 we don't have a cpu, do not override the target bits set in
3519 TARGET_DEFAULT. */
3520 if (cpu_index >= 0)
3521 {
3522 rs6000_cpu_index = cpu_index;
3523 rs6000_isa_flags &= ~set_masks;
3524 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3525 & set_masks);
3526 }
3527 else
3528 {
3529 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3530 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3531 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3532 to using rs6000_isa_flags, we need to do the initialization here.
3533
3534 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3535 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3536 HOST_WIDE_INT flags;
3537 if (TARGET_DEFAULT)
3538 flags = TARGET_DEFAULT;
3539 else
3540 {
3541 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3542 const char *default_cpu = (!TARGET_POWERPC64
3543 ? "powerpc"
3544 : (BYTES_BIG_ENDIAN
3545 ? "powerpc64"
3546 : "powerpc64le"));
3547 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3548 flags = processor_target_table[default_cpu_index].target_enable;
3549 }
3550 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3551 }
3552
3553 if (rs6000_tune_index >= 0)
3554 tune_index = rs6000_tune_index;
3555 else if (cpu_index >= 0)
3556 rs6000_tune_index = tune_index = cpu_index;
3557 else
3558 {
3559 size_t i;
3560 enum processor_type tune_proc
3561 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3562
3563 tune_index = -1;
3564 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3565 if (processor_target_table[i].processor == tune_proc)
3566 {
3567 tune_index = i;
3568 break;
3569 }
3570 }
3571
3572 if (cpu_index >= 0)
3573 rs6000_cpu = processor_target_table[cpu_index].processor;
3574 else
3575 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3576
3577 gcc_assert (tune_index >= 0);
3578 rs6000_tune = processor_target_table[tune_index].processor;
3579
3580 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3581 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3582 || rs6000_cpu == PROCESSOR_PPCE5500)
3583 {
3584 if (TARGET_ALTIVEC)
3585 error ("AltiVec not supported in this target");
3586 }
3587
3588 /* If we are optimizing big endian systems for space, use the load/store
3589 multiple instructions. */
3590 if (BYTES_BIG_ENDIAN && optimize_size)
3591 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3592
3593 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3594 because the hardware doesn't support the instructions used in little
3595 endian mode, and causes an alignment trap. The 750 does not cause an
3596 alignment trap (except when the target is unaligned). */
3597
3598 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3599 {
3600 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3601 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3602 warning (0, "%qs is not supported on little endian systems",
3603 "-mmultiple");
3604 }
3605
3606 /* If little-endian, default to -mstrict-align on older processors.
3607 Testing for htm matches power8 and later. */
3608 if (!BYTES_BIG_ENDIAN
3609 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3610 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3611
3612 if (!rs6000_fold_gimple)
3613 fprintf (stderr,
3614 "gimple folding of rs6000 builtins has been disabled.\n");
3615
3616 /* Add some warnings for VSX. */
3617 if (TARGET_VSX)
3618 {
3619 const char *msg = NULL;
3620 if (!TARGET_HARD_FLOAT)
3621 {
3622 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3623 msg = N_("%<-mvsx%> requires hardware floating point");
3624 else
3625 {
3626 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3627 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3628 }
3629 }
3630 else if (TARGET_AVOID_XFORM > 0)
3631 msg = N_("%<-mvsx%> needs indexed addressing");
3632 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3633 & OPTION_MASK_ALTIVEC))
3634 {
3635 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3636 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3637 else
3638 msg = N_("%<-mno-altivec%> disables vsx");
3639 }
3640
3641 if (msg)
3642 {
3643 warning (0, msg);
3644 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3645 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3646 }
3647 }
3648
3649 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3650 the -mcpu setting to enable options that conflict. */
3651 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3652 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3653 | OPTION_MASK_ALTIVEC
3654 | OPTION_MASK_VSX)) != 0)
3655 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3656 | OPTION_MASK_DIRECT_MOVE)
3657 & ~rs6000_isa_flags_explicit);
3658
3659 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3660 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3661
3662 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3663 off all of the options that depend on those flags. */
3664 ignore_masks = rs6000_disable_incompatible_switches ();
3665
3666 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3667 unless the user explicitly used the -mno-<option> to disable the code. */
3668 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3669 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3670 else if (TARGET_P9_MINMAX)
3671 {
3672 if (cpu_index >= 0)
3673 {
3674 if (cpu_index == PROCESSOR_POWER9)
3675 {
3676 /* legacy behavior: allow -mcpu=power9 with certain
3677 capabilities explicitly disabled. */
3678 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3679 }
3680 else
3681 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3682 "for <xxx> less than power9", "-mcpu");
3683 }
3684 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3685 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3686 & rs6000_isa_flags_explicit))
3687 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3688 were explicitly cleared. */
3689 error ("%qs incompatible with explicitly disabled options",
3690 "-mpower9-minmax");
3691 else
3692 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3693 }
3694 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3695 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3696 else if (TARGET_VSX)
3697 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3698 else if (TARGET_POPCNTD)
3699 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3700 else if (TARGET_DFP)
3701 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3702 else if (TARGET_CMPB)
3703 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3704 else if (TARGET_FPRND)
3705 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3706 else if (TARGET_POPCNTB)
3707 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3708 else if (TARGET_ALTIVEC)
3709 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3710
3711 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3712 {
3713 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3714 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3715 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3716 }
3717
3718 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3719 {
3720 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3721 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3722 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3723 }
3724
3725 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3726 {
3727 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3728 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3729 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3730 }
3731
3732 if (TARGET_P8_VECTOR && !TARGET_VSX)
3733 {
3734 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3735 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3736 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3737 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3738 {
3739 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3740 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3741 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3742 }
3743 else
3744 {
3745 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3746 not explicit. */
3747 rs6000_isa_flags |= OPTION_MASK_VSX;
3748 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3749 }
3750 }
3751
3752 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3753 {
3754 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3755 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3756 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3757 }
3758
3759 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3760 silently turn off quad memory mode. */
3761 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3762 {
3763 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3764 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3765
3766 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3767 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3768
3769 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3770 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3771 }
3772
3773 /* Non-atomic quad memory load/store are disabled for little endian, since
3774 the words are reversed, but atomic operations can still be done by
3775 swapping the words. */
3776 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3777 {
3778 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3779 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3780 "mode"));
3781
3782 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3783 }
3784
3785 /* Assume if the user asked for normal quad memory instructions, they want
3786 the atomic versions as well, unless they explicity told us not to use quad
3787 word atomic instructions. */
3788 if (TARGET_QUAD_MEMORY
3789 && !TARGET_QUAD_MEMORY_ATOMIC
3790 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3791 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3792
3793 /* If we can shrink-wrap the TOC register save separately, then use
3794 -msave-toc-indirect unless explicitly disabled. */
3795 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3796 && flag_shrink_wrap_separate
3797 && optimize_function_for_speed_p (cfun))
3798 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3799
3800 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3801 generating power8 instructions. Power9 does not optimize power8 fusion
3802 cases. */
3803 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3804 {
3805 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3806 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3807 else
3808 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3809 }
3810
3811 /* Setting additional fusion flags turns on base fusion. */
3812 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3813 {
3814 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3815 {
3816 if (TARGET_P8_FUSION_SIGN)
3817 error ("%qs requires %qs", "-mpower8-fusion-sign",
3818 "-mpower8-fusion");
3819
3820 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3821 }
3822 else
3823 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3824 }
3825
3826 /* Power8 does not fuse sign extended loads with the addis. If we are
3827 optimizing at high levels for speed, convert a sign extended load into a
3828 zero extending load, and an explicit sign extension. */
3829 if (TARGET_P8_FUSION
3830 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3831 && optimize_function_for_speed_p (cfun)
3832 && optimize >= 3)
3833 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3834
3835 /* ISA 3.0 vector instructions include ISA 2.07. */
3836 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3837 {
3838 /* We prefer to not mention undocumented options in
3839 error messages. However, if users have managed to select
3840 power9-vector without selecting power8-vector, they
3841 already know about undocumented flags. */
3842 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3843 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3844 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3845 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3846 {
3847 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3848 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3849 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3850 }
3851 else
3852 {
3853 /* OPTION_MASK_P9_VECTOR is explicit and
3854 OPTION_MASK_P8_VECTOR is not explicit. */
3855 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3856 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3857 }
3858 }
3859
3860 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3861 support. If we only have ISA 2.06 support, and the user did not specify
3862 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3863 but we don't enable the full vectorization support */
3864 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3865 TARGET_ALLOW_MOVMISALIGN = 1;
3866
3867 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3868 {
3869 if (TARGET_ALLOW_MOVMISALIGN > 0
3870 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3871 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3872
3873 TARGET_ALLOW_MOVMISALIGN = 0;
3874 }
3875
3876 /* Determine when unaligned vector accesses are permitted, and when
3877 they are preferred over masked Altivec loads. Note that if
3878 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3879 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3880 not true. */
3881 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3882 {
3883 if (!TARGET_VSX)
3884 {
3885 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3886 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3887
3888 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3889 }
3890
3891 else if (!TARGET_ALLOW_MOVMISALIGN)
3892 {
3893 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3894 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3895 "-mallow-movmisalign");
3896
3897 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3898 }
3899 }
3900
3901 /* Use long double size to select the appropriate long double. We use
3902 TYPE_PRECISION to differentiate the 3 different long double types. We map
3903 128 into the precision used for TFmode. */
3904 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3905 ? 64
3906 : FLOAT_PRECISION_TFmode);
3907
3908 /* Set long double size before the IEEE 128-bit tests. */
3909 if (!global_options_set.x_rs6000_long_double_type_size)
3910 {
3911 if (main_target_opt != NULL
3912 && (main_target_opt->x_rs6000_long_double_type_size
3913 != default_long_double_size))
3914 error ("target attribute or pragma changes %<long double%> size");
3915 else
3916 rs6000_long_double_type_size = default_long_double_size;
3917 }
3918 else if (rs6000_long_double_type_size == 128)
3919 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3920 else if (global_options_set.x_rs6000_ieeequad)
3921 {
3922 if (global_options.x_rs6000_ieeequad)
3923 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3924 else
3925 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3926 }
3927
3928 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3929 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3930 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3931 those systems will not pick up this default. Warn if the user changes the
3932 default unless -Wno-psabi. */
3933 if (!global_options_set.x_rs6000_ieeequad)
3934 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3935
3936 else
3937 {
3938 if (global_options.x_rs6000_ieeequad
3939 && (!TARGET_POPCNTD || !TARGET_VSX))
3940 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3941
3942 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3943 {
3944 static bool warned_change_long_double;
3945 if (!warned_change_long_double)
3946 {
3947 warned_change_long_double = true;
3948 if (TARGET_IEEEQUAD)
3949 warning (OPT_Wpsabi, "Using IEEE extended precision "
3950 "%<long double%>");
3951 else
3952 warning (OPT_Wpsabi, "Using IBM extended precision "
3953 "%<long double%>");
3954 }
3955 }
3956 }
3957
3958 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3959 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3960 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3961 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3962 the keyword as well as the type. */
3963 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3964
3965 /* IEEE 128-bit floating point requires VSX support. */
3966 if (TARGET_FLOAT128_KEYWORD)
3967 {
3968 if (!TARGET_VSX)
3969 {
3970 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3971 error ("%qs requires VSX support", "%<-mfloat128%>");
3972
3973 TARGET_FLOAT128_TYPE = 0;
3974 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3975 | OPTION_MASK_FLOAT128_HW);
3976 }
3977 else if (!TARGET_FLOAT128_TYPE)
3978 {
3979 TARGET_FLOAT128_TYPE = 1;
3980 warning (0, "The %<-mfloat128%> option may not be fully supported");
3981 }
3982 }
3983
3984 /* Enable the __float128 keyword under Linux by default. */
3985 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3986 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3987 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3988
3989 /* If we have are supporting the float128 type and full ISA 3.0 support,
3990 enable -mfloat128-hardware by default. However, don't enable the
3991 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3992 because sometimes the compiler wants to put things in an integer
3993 container, and if we don't have __int128 support, it is impossible. */
3994 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
3995 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
3996 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
3997 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
3998
3999 if (TARGET_FLOAT128_HW
4000 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4001 {
4002 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4003 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4004
4005 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4006 }
4007
4008 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4009 {
4010 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4011 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4012
4013 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4014 }
4015
4016 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4017 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4018 {
4019 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4020 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4021 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4022 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4023
4024 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4025 }
4026
4027 /* -mpcrel requires prefixed load/store addressing. */
4028 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4029 {
4030 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4031 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4032
4033 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4034 }
4035
4036 /* Print the options after updating the defaults. */
4037 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4038 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4039
4040 /* E500mc does "better" if we inline more aggressively. Respect the
4041 user's opinion, though. */
4042 if (rs6000_block_move_inline_limit == 0
4043 && (rs6000_tune == PROCESSOR_PPCE500MC
4044 || rs6000_tune == PROCESSOR_PPCE500MC64
4045 || rs6000_tune == PROCESSOR_PPCE5500
4046 || rs6000_tune == PROCESSOR_PPCE6500))
4047 rs6000_block_move_inline_limit = 128;
4048
4049 /* store_one_arg depends on expand_block_move to handle at least the
4050 size of reg_parm_stack_space. */
4051 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4052 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4053
4054 if (global_init_p)
4055 {
4056 /* If the appropriate debug option is enabled, replace the target hooks
4057 with debug versions that call the real version and then prints
4058 debugging information. */
4059 if (TARGET_DEBUG_COST)
4060 {
4061 targetm.rtx_costs = rs6000_debug_rtx_costs;
4062 targetm.address_cost = rs6000_debug_address_cost;
4063 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4064 }
4065
4066 if (TARGET_DEBUG_ADDR)
4067 {
4068 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4069 targetm.legitimize_address = rs6000_debug_legitimize_address;
4070 rs6000_secondary_reload_class_ptr
4071 = rs6000_debug_secondary_reload_class;
4072 targetm.secondary_memory_needed
4073 = rs6000_debug_secondary_memory_needed;
4074 targetm.can_change_mode_class
4075 = rs6000_debug_can_change_mode_class;
4076 rs6000_preferred_reload_class_ptr
4077 = rs6000_debug_preferred_reload_class;
4078 rs6000_mode_dependent_address_ptr
4079 = rs6000_debug_mode_dependent_address;
4080 }
4081
4082 if (rs6000_veclibabi_name)
4083 {
4084 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4085 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4086 else
4087 {
4088 error ("unknown vectorization library ABI type (%qs) for "
4089 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4090 ret = false;
4091 }
4092 }
4093 }
4094
4095 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4096 target attribute or pragma which automatically enables both options,
4097 unless the altivec ABI was set. This is set by default for 64-bit, but
4098 not for 32-bit. */
4099 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4100 {
4101 TARGET_FLOAT128_TYPE = 0;
4102 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4103 | OPTION_MASK_FLOAT128_KEYWORD)
4104 & ~rs6000_isa_flags_explicit);
4105 }
4106
4107 /* Enable Altivec ABI for AIX -maltivec. */
4108 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4109 {
4110 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4111 error ("target attribute or pragma changes AltiVec ABI");
4112 else
4113 rs6000_altivec_abi = 1;
4114 }
4115
4116 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4117 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4118 be explicitly overridden in either case. */
4119 if (TARGET_ELF)
4120 {
4121 if (!global_options_set.x_rs6000_altivec_abi
4122 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4123 {
4124 if (main_target_opt != NULL &&
4125 !main_target_opt->x_rs6000_altivec_abi)
4126 error ("target attribute or pragma changes AltiVec ABI");
4127 else
4128 rs6000_altivec_abi = 1;
4129 }
4130 }
4131
4132 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4133 So far, the only darwin64 targets are also MACH-O. */
4134 if (TARGET_MACHO
4135 && DEFAULT_ABI == ABI_DARWIN
4136 && TARGET_64BIT)
4137 {
4138 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4139 error ("target attribute or pragma changes darwin64 ABI");
4140 else
4141 {
4142 rs6000_darwin64_abi = 1;
4143 /* Default to natural alignment, for better performance. */
4144 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4145 }
4146 }
4147
4148 /* Place FP constants in the constant pool instead of TOC
4149 if section anchors enabled. */
4150 if (flag_section_anchors
4151 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4152 TARGET_NO_FP_IN_TOC = 1;
4153
4154 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4155 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4156
4157 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4158 SUBTARGET_OVERRIDE_OPTIONS;
4159 #endif
4160 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4161 SUBSUBTARGET_OVERRIDE_OPTIONS;
4162 #endif
4163 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4164 SUB3TARGET_OVERRIDE_OPTIONS;
4165 #endif
4166
4167 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4168 after the subtarget override options are done. */
4169 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4170 {
4171 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4172 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4173
4174 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4175 }
4176
4177 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4178 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4179
4180 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4181 && rs6000_tune != PROCESSOR_POWER5
4182 && rs6000_tune != PROCESSOR_POWER6
4183 && rs6000_tune != PROCESSOR_POWER7
4184 && rs6000_tune != PROCESSOR_POWER8
4185 && rs6000_tune != PROCESSOR_POWER9
4186 && rs6000_tune != PROCESSOR_FUTURE
4187 && rs6000_tune != PROCESSOR_PPCA2
4188 && rs6000_tune != PROCESSOR_CELL
4189 && rs6000_tune != PROCESSOR_PPC476);
4190 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4191 || rs6000_tune == PROCESSOR_POWER5
4192 || rs6000_tune == PROCESSOR_POWER7
4193 || rs6000_tune == PROCESSOR_POWER8);
4194 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4195 || rs6000_tune == PROCESSOR_POWER5
4196 || rs6000_tune == PROCESSOR_POWER6
4197 || rs6000_tune == PROCESSOR_POWER7
4198 || rs6000_tune == PROCESSOR_POWER8
4199 || rs6000_tune == PROCESSOR_POWER9
4200 || rs6000_tune == PROCESSOR_FUTURE
4201 || rs6000_tune == PROCESSOR_PPCE500MC
4202 || rs6000_tune == PROCESSOR_PPCE500MC64
4203 || rs6000_tune == PROCESSOR_PPCE5500
4204 || rs6000_tune == PROCESSOR_PPCE6500);
4205
4206 /* Allow debug switches to override the above settings. These are set to -1
4207 in rs6000.opt to indicate the user hasn't directly set the switch. */
4208 if (TARGET_ALWAYS_HINT >= 0)
4209 rs6000_always_hint = TARGET_ALWAYS_HINT;
4210
4211 if (TARGET_SCHED_GROUPS >= 0)
4212 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4213
4214 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4215 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4216
4217 rs6000_sched_restricted_insns_priority
4218 = (rs6000_sched_groups ? 1 : 0);
4219
4220 /* Handle -msched-costly-dep option. */
4221 rs6000_sched_costly_dep
4222 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4223
4224 if (rs6000_sched_costly_dep_str)
4225 {
4226 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4227 rs6000_sched_costly_dep = no_dep_costly;
4228 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4229 rs6000_sched_costly_dep = all_deps_costly;
4230 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4231 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4232 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4233 rs6000_sched_costly_dep = store_to_load_dep_costly;
4234 else
4235 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4236 atoi (rs6000_sched_costly_dep_str));
4237 }
4238
4239 /* Handle -minsert-sched-nops option. */
4240 rs6000_sched_insert_nops
4241 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4242
4243 if (rs6000_sched_insert_nops_str)
4244 {
4245 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4246 rs6000_sched_insert_nops = sched_finish_none;
4247 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4248 rs6000_sched_insert_nops = sched_finish_pad_groups;
4249 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4250 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4251 else
4252 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4253 atoi (rs6000_sched_insert_nops_str));
4254 }
4255
4256 /* Handle stack protector */
4257 if (!global_options_set.x_rs6000_stack_protector_guard)
4258 #ifdef TARGET_THREAD_SSP_OFFSET
4259 rs6000_stack_protector_guard = SSP_TLS;
4260 #else
4261 rs6000_stack_protector_guard = SSP_GLOBAL;
4262 #endif
4263
4264 #ifdef TARGET_THREAD_SSP_OFFSET
4265 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4266 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4267 #endif
4268
4269 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4270 {
4271 char *endp;
4272 const char *str = rs6000_stack_protector_guard_offset_str;
4273
4274 errno = 0;
4275 long offset = strtol (str, &endp, 0);
4276 if (!*str || *endp || errno)
4277 error ("%qs is not a valid number in %qs", str,
4278 "-mstack-protector-guard-offset=");
4279
4280 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4281 || (TARGET_64BIT && (offset & 3)))
4282 error ("%qs is not a valid offset in %qs", str,
4283 "-mstack-protector-guard-offset=");
4284
4285 rs6000_stack_protector_guard_offset = offset;
4286 }
4287
4288 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4289 {
4290 const char *str = rs6000_stack_protector_guard_reg_str;
4291 int reg = decode_reg_name (str);
4292
4293 if (!IN_RANGE (reg, 1, 31))
4294 error ("%qs is not a valid base register in %qs", str,
4295 "-mstack-protector-guard-reg=");
4296
4297 rs6000_stack_protector_guard_reg = reg;
4298 }
4299
4300 if (rs6000_stack_protector_guard == SSP_TLS
4301 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4302 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4303
4304 if (global_init_p)
4305 {
4306 #ifdef TARGET_REGNAMES
4307 /* If the user desires alternate register names, copy in the
4308 alternate names now. */
4309 if (TARGET_REGNAMES)
4310 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4311 #endif
4312
4313 /* Set aix_struct_return last, after the ABI is determined.
4314 If -maix-struct-return or -msvr4-struct-return was explicitly
4315 used, don't override with the ABI default. */
4316 if (!global_options_set.x_aix_struct_return)
4317 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4318
4319 #if 0
4320 /* IBM XL compiler defaults to unsigned bitfields. */
4321 if (TARGET_XL_COMPAT)
4322 flag_signed_bitfields = 0;
4323 #endif
4324
4325 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4326 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4327
4328 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4329
4330 /* We can only guarantee the availability of DI pseudo-ops when
4331 assembling for 64-bit targets. */
4332 if (!TARGET_64BIT)
4333 {
4334 targetm.asm_out.aligned_op.di = NULL;
4335 targetm.asm_out.unaligned_op.di = NULL;
4336 }
4337
4338
4339 /* Set branch target alignment, if not optimizing for size. */
4340 if (!optimize_size)
4341 {
4342 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4343 aligned 8byte to avoid misprediction by the branch predictor. */
4344 if (rs6000_tune == PROCESSOR_TITAN
4345 || rs6000_tune == PROCESSOR_CELL)
4346 {
4347 if (flag_align_functions && !str_align_functions)
4348 str_align_functions = "8";
4349 if (flag_align_jumps && !str_align_jumps)
4350 str_align_jumps = "8";
4351 if (flag_align_loops && !str_align_loops)
4352 str_align_loops = "8";
4353 }
4354 if (rs6000_align_branch_targets)
4355 {
4356 if (flag_align_functions && !str_align_functions)
4357 str_align_functions = "16";
4358 if (flag_align_jumps && !str_align_jumps)
4359 str_align_jumps = "16";
4360 if (flag_align_loops && !str_align_loops)
4361 {
4362 can_override_loop_align = 1;
4363 str_align_loops = "16";
4364 }
4365 }
4366
4367 if (flag_align_jumps && !str_align_jumps)
4368 str_align_jumps = "16";
4369 if (flag_align_loops && !str_align_loops)
4370 str_align_loops = "16";
4371 }
4372
4373 /* Arrange to save and restore machine status around nested functions. */
4374 init_machine_status = rs6000_init_machine_status;
4375
4376 /* We should always be splitting complex arguments, but we can't break
4377 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4378 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4379 targetm.calls.split_complex_arg = NULL;
4380
4381 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4382 if (DEFAULT_ABI == ABI_AIX)
4383 targetm.calls.custom_function_descriptors = 0;
4384 }
4385
4386 /* Initialize rs6000_cost with the appropriate target costs. */
4387 if (optimize_size)
4388 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4389 else
4390 switch (rs6000_tune)
4391 {
4392 case PROCESSOR_RS64A:
4393 rs6000_cost = &rs64a_cost;
4394 break;
4395
4396 case PROCESSOR_MPCCORE:
4397 rs6000_cost = &mpccore_cost;
4398 break;
4399
4400 case PROCESSOR_PPC403:
4401 rs6000_cost = &ppc403_cost;
4402 break;
4403
4404 case PROCESSOR_PPC405:
4405 rs6000_cost = &ppc405_cost;
4406 break;
4407
4408 case PROCESSOR_PPC440:
4409 rs6000_cost = &ppc440_cost;
4410 break;
4411
4412 case PROCESSOR_PPC476:
4413 rs6000_cost = &ppc476_cost;
4414 break;
4415
4416 case PROCESSOR_PPC601:
4417 rs6000_cost = &ppc601_cost;
4418 break;
4419
4420 case PROCESSOR_PPC603:
4421 rs6000_cost = &ppc603_cost;
4422 break;
4423
4424 case PROCESSOR_PPC604:
4425 rs6000_cost = &ppc604_cost;
4426 break;
4427
4428 case PROCESSOR_PPC604e:
4429 rs6000_cost = &ppc604e_cost;
4430 break;
4431
4432 case PROCESSOR_PPC620:
4433 rs6000_cost = &ppc620_cost;
4434 break;
4435
4436 case PROCESSOR_PPC630:
4437 rs6000_cost = &ppc630_cost;
4438 break;
4439
4440 case PROCESSOR_CELL:
4441 rs6000_cost = &ppccell_cost;
4442 break;
4443
4444 case PROCESSOR_PPC750:
4445 case PROCESSOR_PPC7400:
4446 rs6000_cost = &ppc750_cost;
4447 break;
4448
4449 case PROCESSOR_PPC7450:
4450 rs6000_cost = &ppc7450_cost;
4451 break;
4452
4453 case PROCESSOR_PPC8540:
4454 case PROCESSOR_PPC8548:
4455 rs6000_cost = &ppc8540_cost;
4456 break;
4457
4458 case PROCESSOR_PPCE300C2:
4459 case PROCESSOR_PPCE300C3:
4460 rs6000_cost = &ppce300c2c3_cost;
4461 break;
4462
4463 case PROCESSOR_PPCE500MC:
4464 rs6000_cost = &ppce500mc_cost;
4465 break;
4466
4467 case PROCESSOR_PPCE500MC64:
4468 rs6000_cost = &ppce500mc64_cost;
4469 break;
4470
4471 case PROCESSOR_PPCE5500:
4472 rs6000_cost = &ppce5500_cost;
4473 break;
4474
4475 case PROCESSOR_PPCE6500:
4476 rs6000_cost = &ppce6500_cost;
4477 break;
4478
4479 case PROCESSOR_TITAN:
4480 rs6000_cost = &titan_cost;
4481 break;
4482
4483 case PROCESSOR_POWER4:
4484 case PROCESSOR_POWER5:
4485 rs6000_cost = &power4_cost;
4486 break;
4487
4488 case PROCESSOR_POWER6:
4489 rs6000_cost = &power6_cost;
4490 break;
4491
4492 case PROCESSOR_POWER7:
4493 rs6000_cost = &power7_cost;
4494 break;
4495
4496 case PROCESSOR_POWER8:
4497 rs6000_cost = &power8_cost;
4498 break;
4499
4500 case PROCESSOR_POWER9:
4501 case PROCESSOR_FUTURE:
4502 rs6000_cost = &power9_cost;
4503 break;
4504
4505 case PROCESSOR_PPCA2:
4506 rs6000_cost = &ppca2_cost;
4507 break;
4508
4509 default:
4510 gcc_unreachable ();
4511 }
4512
4513 if (global_init_p)
4514 {
4515 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4516 param_simultaneous_prefetches,
4517 rs6000_cost->simultaneous_prefetches);
4518 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4519 param_l1_cache_size,
4520 rs6000_cost->l1_cache_size);
4521 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4522 param_l1_cache_line_size,
4523 rs6000_cost->cache_line_size);
4524 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4525 param_l2_cache_size,
4526 rs6000_cost->l2_cache_size);
4527
4528 /* Increase loop peeling limits based on performance analysis. */
4529 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4530 param_max_peeled_insns, 400);
4531 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4532 param_max_completely_peeled_insns, 400);
4533
4534 /* Use the 'model' -fsched-pressure algorithm by default. */
4535 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4536 param_sched_pressure_algorithm,
4537 SCHED_PRESSURE_MODEL);
4538
4539 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4540 turns -fweb and -frename-registers on. */
4541 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4542 || (global_options_set.x_flag_unroll_all_loops
4543 && flag_unroll_all_loops))
4544 {
4545 if (!global_options_set.x_unroll_only_small_loops)
4546 unroll_only_small_loops = 0;
4547 if (!global_options_set.x_flag_rename_registers)
4548 flag_rename_registers = 1;
4549 if (!global_options_set.x_flag_web)
4550 flag_web = 1;
4551 }
4552
4553 /* If using typedef char *va_list, signal that
4554 __builtin_va_start (&ap, 0) can be optimized to
4555 ap = __builtin_next_arg (0). */
4556 if (DEFAULT_ABI != ABI_V4)
4557 targetm.expand_builtin_va_start = NULL;
4558 }
4559
4560 /* If not explicitly specified via option, decide whether to generate indexed
4561 load/store instructions. A value of -1 indicates that the
4562 initial value of this variable has not been overwritten. During
4563 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4564 if (TARGET_AVOID_XFORM == -1)
4565 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4566 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4567 need indexed accesses and the type used is the scalar type of the element
4568 being loaded or stored. */
4569 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4570 && !TARGET_ALTIVEC);
4571
4572 /* Set the -mrecip options. */
4573 if (rs6000_recip_name)
4574 {
4575 char *p = ASTRDUP (rs6000_recip_name);
4576 char *q;
4577 unsigned int mask, i;
4578 bool invert;
4579
4580 while ((q = strtok (p, ",")) != NULL)
4581 {
4582 p = NULL;
4583 if (*q == '!')
4584 {
4585 invert = true;
4586 q++;
4587 }
4588 else
4589 invert = false;
4590
4591 if (!strcmp (q, "default"))
4592 mask = ((TARGET_RECIP_PRECISION)
4593 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4594 else
4595 {
4596 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4597 if (!strcmp (q, recip_options[i].string))
4598 {
4599 mask = recip_options[i].mask;
4600 break;
4601 }
4602
4603 if (i == ARRAY_SIZE (recip_options))
4604 {
4605 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4606 invert = false;
4607 mask = 0;
4608 ret = false;
4609 }
4610 }
4611
4612 if (invert)
4613 rs6000_recip_control &= ~mask;
4614 else
4615 rs6000_recip_control |= mask;
4616 }
4617 }
4618
4619 /* Set the builtin mask of the various options used that could affect which
4620 builtins were used. In the past we used target_flags, but we've run out
4621 of bits, and some options are no longer in target_flags. */
4622 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4623 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4624 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4625 rs6000_builtin_mask);
4626
4627 /* Initialize all of the registers. */
4628 rs6000_init_hard_regno_mode_ok (global_init_p);
4629
4630 /* Save the initial options in case the user does function specific options */
4631 if (global_init_p)
4632 target_option_default_node = target_option_current_node
4633 = build_target_option_node (&global_options);
4634
4635 /* If not explicitly specified via option, decide whether to generate the
4636 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4637 if (TARGET_LINK_STACK == -1)
4638 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4639
4640 /* Deprecate use of -mno-speculate-indirect-jumps. */
4641 if (!rs6000_speculate_indirect_jumps)
4642 warning (0, "%qs is deprecated and not recommended in any circumstances",
4643 "-mno-speculate-indirect-jumps");
4644
4645 return ret;
4646 }
4647
4648 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4649 define the target cpu type. */
4650
4651 static void
4652 rs6000_option_override (void)
4653 {
4654 (void) rs6000_option_override_internal (true);
4655 }
4656
4657 \f
4658 /* Implement targetm.vectorize.builtin_mask_for_load. */
4659 static tree
4660 rs6000_builtin_mask_for_load (void)
4661 {
4662 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4663 if ((TARGET_ALTIVEC && !TARGET_VSX)
4664 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4665 return altivec_builtin_mask_for_load;
4666 else
4667 return 0;
4668 }
4669
4670 /* Implement LOOP_ALIGN. */
4671 align_flags
4672 rs6000_loop_align (rtx label)
4673 {
4674 basic_block bb;
4675 int ninsns;
4676
4677 /* Don't override loop alignment if -falign-loops was specified. */
4678 if (!can_override_loop_align)
4679 return align_loops;
4680
4681 bb = BLOCK_FOR_INSN (label);
4682 ninsns = num_loop_insns(bb->loop_father);
4683
4684 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4685 if (ninsns > 4 && ninsns <= 8
4686 && (rs6000_tune == PROCESSOR_POWER4
4687 || rs6000_tune == PROCESSOR_POWER5
4688 || rs6000_tune == PROCESSOR_POWER6
4689 || rs6000_tune == PROCESSOR_POWER7
4690 || rs6000_tune == PROCESSOR_POWER8))
4691 return align_flags (5);
4692 else
4693 return align_loops;
4694 }
4695
4696 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4697 after applying N number of iterations. This routine does not determine
4698 how may iterations are required to reach desired alignment. */
4699
4700 static bool
4701 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4702 {
4703 if (is_packed)
4704 return false;
4705
4706 if (TARGET_32BIT)
4707 {
4708 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4709 return true;
4710
4711 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4712 return true;
4713
4714 return false;
4715 }
4716 else
4717 {
4718 if (TARGET_MACHO)
4719 return false;
4720
4721 /* Assuming that all other types are naturally aligned. CHECKME! */
4722 return true;
4723 }
4724 }
4725
4726 /* Return true if the vector misalignment factor is supported by the
4727 target. */
4728 static bool
4729 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4730 const_tree type,
4731 int misalignment,
4732 bool is_packed)
4733 {
4734 if (TARGET_VSX)
4735 {
4736 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4737 return true;
4738
4739 /* Return if movmisalign pattern is not supported for this mode. */
4740 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4741 return false;
4742
4743 if (misalignment == -1)
4744 {
4745 /* Misalignment factor is unknown at compile time but we know
4746 it's word aligned. */
4747 if (rs6000_vector_alignment_reachable (type, is_packed))
4748 {
4749 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4750
4751 if (element_size == 64 || element_size == 32)
4752 return true;
4753 }
4754
4755 return false;
4756 }
4757
4758 /* VSX supports word-aligned vector. */
4759 if (misalignment % 4 == 0)
4760 return true;
4761 }
4762 return false;
4763 }
4764
4765 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4766 static int
4767 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4768 tree vectype, int misalign)
4769 {
4770 unsigned elements;
4771 tree elem_type;
4772
4773 switch (type_of_cost)
4774 {
4775 case scalar_stmt:
4776 case scalar_store:
4777 case vector_stmt:
4778 case vector_store:
4779 case vec_to_scalar:
4780 case scalar_to_vec:
4781 case cond_branch_not_taken:
4782 return 1;
4783 case scalar_load:
4784 case vector_load:
4785 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4786 return 2;
4787
4788 case vec_perm:
4789 /* Power7 has only one permute unit, make it a bit expensive. */
4790 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4791 return 3;
4792 else
4793 return 1;
4794
4795 case vec_promote_demote:
4796 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4797 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4798 return 4;
4799 else
4800 return 1;
4801
4802 case cond_branch_taken:
4803 return 3;
4804
4805 case unaligned_load:
4806 case vector_gather_load:
4807 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4808 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4809 return 2;
4810
4811 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4812 {
4813 elements = TYPE_VECTOR_SUBPARTS (vectype);
4814 if (elements == 2)
4815 /* Double word aligned. */
4816 return 4;
4817
4818 if (elements == 4)
4819 {
4820 switch (misalign)
4821 {
4822 case 8:
4823 /* Double word aligned. */
4824 return 4;
4825
4826 case -1:
4827 /* Unknown misalignment. */
4828 case 4:
4829 case 12:
4830 /* Word aligned. */
4831 return 33;
4832
4833 default:
4834 gcc_unreachable ();
4835 }
4836 }
4837 }
4838
4839 if (TARGET_ALTIVEC)
4840 /* Misaligned loads are not supported. */
4841 gcc_unreachable ();
4842
4843 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4844 return 4;
4845
4846 case unaligned_store:
4847 case vector_scatter_store:
4848 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4849 return 1;
4850
4851 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4852 {
4853 elements = TYPE_VECTOR_SUBPARTS (vectype);
4854 if (elements == 2)
4855 /* Double word aligned. */
4856 return 2;
4857
4858 if (elements == 4)
4859 {
4860 switch (misalign)
4861 {
4862 case 8:
4863 /* Double word aligned. */
4864 return 2;
4865
4866 case -1:
4867 /* Unknown misalignment. */
4868 case 4:
4869 case 12:
4870 /* Word aligned. */
4871 return 23;
4872
4873 default:
4874 gcc_unreachable ();
4875 }
4876 }
4877 }
4878
4879 if (TARGET_ALTIVEC)
4880 /* Misaligned stores are not supported. */
4881 gcc_unreachable ();
4882
4883 return 2;
4884
4885 case vec_construct:
4886 /* This is a rough approximation assuming non-constant elements
4887 constructed into a vector via element insertion. FIXME:
4888 vec_construct is not granular enough for uniformly good
4889 decisions. If the initialization is a splat, this is
4890 cheaper than we estimate. Improve this someday. */
4891 elem_type = TREE_TYPE (vectype);
4892 /* 32-bit vectors loaded into registers are stored as double
4893 precision, so we need 2 permutes, 2 converts, and 1 merge
4894 to construct a vector of short floats from them. */
4895 if (SCALAR_FLOAT_TYPE_P (elem_type)
4896 && TYPE_PRECISION (elem_type) == 32)
4897 return 5;
4898 /* On POWER9, integer vector types are built up in GPRs and then
4899 use a direct move (2 cycles). For POWER8 this is even worse,
4900 as we need two direct moves and a merge, and the direct moves
4901 are five cycles. */
4902 else if (INTEGRAL_TYPE_P (elem_type))
4903 {
4904 if (TARGET_P9_VECTOR)
4905 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4906 else
4907 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4908 }
4909 else
4910 /* V2DFmode doesn't need a direct move. */
4911 return 2;
4912
4913 default:
4914 gcc_unreachable ();
4915 }
4916 }
4917
4918 /* Implement targetm.vectorize.preferred_simd_mode. */
4919
4920 static machine_mode
4921 rs6000_preferred_simd_mode (scalar_mode mode)
4922 {
4923 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4924
4925 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4926 return vmode.require ();
4927
4928 return word_mode;
4929 }
4930
4931 typedef struct _rs6000_cost_data
4932 {
4933 struct loop *loop_info;
4934 unsigned cost[3];
4935 } rs6000_cost_data;
4936
4937 /* Test for likely overcommitment of vector hardware resources. If a
4938 loop iteration is relatively large, and too large a percentage of
4939 instructions in the loop are vectorized, the cost model may not
4940 adequately reflect delays from unavailable vector resources.
4941 Penalize the loop body cost for this case. */
4942
4943 static void
4944 rs6000_density_test (rs6000_cost_data *data)
4945 {
4946 const int DENSITY_PCT_THRESHOLD = 85;
4947 const int DENSITY_SIZE_THRESHOLD = 70;
4948 const int DENSITY_PENALTY = 10;
4949 struct loop *loop = data->loop_info;
4950 basic_block *bbs = get_loop_body (loop);
4951 int nbbs = loop->num_nodes;
4952 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4953 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4954 int i, density_pct;
4955
4956 for (i = 0; i < nbbs; i++)
4957 {
4958 basic_block bb = bbs[i];
4959 gimple_stmt_iterator gsi;
4960
4961 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4962 {
4963 gimple *stmt = gsi_stmt (gsi);
4964 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4965
4966 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4967 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4968 not_vec_cost++;
4969 }
4970 }
4971
4972 free (bbs);
4973 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4974
4975 if (density_pct > DENSITY_PCT_THRESHOLD
4976 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4977 {
4978 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_NOTE, vect_location,
4981 "density %d%%, cost %d exceeds threshold, penalizing "
4982 "loop body cost by %d%%", density_pct,
4983 vec_cost + not_vec_cost, DENSITY_PENALTY);
4984 }
4985 }
4986
4987 /* Implement targetm.vectorize.init_cost. */
4988
4989 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4990 instruction is needed by the vectorization. */
4991 static bool rs6000_vect_nonmem;
4992
4993 static void *
4994 rs6000_init_cost (struct loop *loop_info)
4995 {
4996 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4997 data->loop_info = loop_info;
4998 data->cost[vect_prologue] = 0;
4999 data->cost[vect_body] = 0;
5000 data->cost[vect_epilogue] = 0;
5001 rs6000_vect_nonmem = false;
5002 return data;
5003 }
5004
5005 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5006 For some statement, we would like to further fine-grain tweak the cost on
5007 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5008 information on statement operation codes etc. One typical case here is
5009 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5010 for scalar cost, but it should be priced more whatever transformed to either
5011 compare + branch or compare + isel instructions. */
5012
5013 static unsigned
5014 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5015 struct _stmt_vec_info *stmt_info)
5016 {
5017 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5018 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5019 {
5020 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5021 if (subcode == COND_EXPR)
5022 return 2;
5023 }
5024
5025 return 0;
5026 }
5027
5028 /* Implement targetm.vectorize.add_stmt_cost. */
5029
5030 static unsigned
5031 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5032 struct _stmt_vec_info *stmt_info, int misalign,
5033 enum vect_cost_model_location where)
5034 {
5035 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5036 unsigned retval = 0;
5037
5038 if (flag_vect_cost_model)
5039 {
5040 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5041 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5042 misalign);
5043 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5044 /* Statements in an inner loop relative to the loop being
5045 vectorized are weighted more heavily. The value here is
5046 arbitrary and could potentially be improved with analysis. */
5047 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5048 count *= 50; /* FIXME. */
5049
5050 retval = (unsigned) (count * stmt_cost);
5051 cost_data->cost[where] += retval;
5052
5053 /* Check whether we're doing something other than just a copy loop.
5054 Not all such loops may be profitably vectorized; see
5055 rs6000_finish_cost. */
5056 if ((kind == vec_to_scalar || kind == vec_perm
5057 || kind == vec_promote_demote || kind == vec_construct
5058 || kind == scalar_to_vec)
5059 || (where == vect_body && kind == vector_stmt))
5060 rs6000_vect_nonmem = true;
5061 }
5062
5063 return retval;
5064 }
5065
5066 /* Implement targetm.vectorize.finish_cost. */
5067
5068 static void
5069 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5070 unsigned *body_cost, unsigned *epilogue_cost)
5071 {
5072 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5073
5074 if (cost_data->loop_info)
5075 rs6000_density_test (cost_data);
5076
5077 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5078 that require versioning for any reason. The vectorization is at
5079 best a wash inside the loop, and the versioning checks make
5080 profitability highly unlikely and potentially quite harmful. */
5081 if (cost_data->loop_info)
5082 {
5083 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5084 if (!rs6000_vect_nonmem
5085 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5086 && LOOP_REQUIRES_VERSIONING (vec_info))
5087 cost_data->cost[vect_body] += 10000;
5088 }
5089
5090 *prologue_cost = cost_data->cost[vect_prologue];
5091 *body_cost = cost_data->cost[vect_body];
5092 *epilogue_cost = cost_data->cost[vect_epilogue];
5093 }
5094
5095 /* Implement targetm.vectorize.destroy_cost_data. */
5096
5097 static void
5098 rs6000_destroy_cost_data (void *data)
5099 {
5100 free (data);
5101 }
5102
5103 /* Implement targetm.loop_unroll_adjust. */
5104
5105 static unsigned
5106 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5107 {
5108 if (unroll_only_small_loops)
5109 {
5110 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5111 example we may want to unroll very small loops more times (4 perhaps).
5112 We also should use a PARAM for this. */
5113 if (loop->ninsns <= 10)
5114 return MIN (2, nunroll);
5115 else
5116 return 0;
5117 }
5118
5119 return nunroll;
5120 }
5121
5122 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5123 library with vectorized intrinsics. */
5124
5125 static tree
5126 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5127 tree type_in)
5128 {
5129 char name[32];
5130 const char *suffix = NULL;
5131 tree fntype, new_fndecl, bdecl = NULL_TREE;
5132 int n_args = 1;
5133 const char *bname;
5134 machine_mode el_mode, in_mode;
5135 int n, in_n;
5136
5137 /* Libmass is suitable for unsafe math only as it does not correctly support
5138 parts of IEEE with the required precision such as denormals. Only support
5139 it if we have VSX to use the simd d2 or f4 functions.
5140 XXX: Add variable length support. */
5141 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5142 return NULL_TREE;
5143
5144 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5145 n = TYPE_VECTOR_SUBPARTS (type_out);
5146 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5147 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5148 if (el_mode != in_mode
5149 || n != in_n)
5150 return NULL_TREE;
5151
5152 switch (fn)
5153 {
5154 CASE_CFN_ATAN2:
5155 CASE_CFN_HYPOT:
5156 CASE_CFN_POW:
5157 n_args = 2;
5158 gcc_fallthrough ();
5159
5160 CASE_CFN_ACOS:
5161 CASE_CFN_ACOSH:
5162 CASE_CFN_ASIN:
5163 CASE_CFN_ASINH:
5164 CASE_CFN_ATAN:
5165 CASE_CFN_ATANH:
5166 CASE_CFN_CBRT:
5167 CASE_CFN_COS:
5168 CASE_CFN_COSH:
5169 CASE_CFN_ERF:
5170 CASE_CFN_ERFC:
5171 CASE_CFN_EXP2:
5172 CASE_CFN_EXP:
5173 CASE_CFN_EXPM1:
5174 CASE_CFN_LGAMMA:
5175 CASE_CFN_LOG10:
5176 CASE_CFN_LOG1P:
5177 CASE_CFN_LOG2:
5178 CASE_CFN_LOG:
5179 CASE_CFN_SIN:
5180 CASE_CFN_SINH:
5181 CASE_CFN_SQRT:
5182 CASE_CFN_TAN:
5183 CASE_CFN_TANH:
5184 if (el_mode == DFmode && n == 2)
5185 {
5186 bdecl = mathfn_built_in (double_type_node, fn);
5187 suffix = "d2"; /* pow -> powd2 */
5188 }
5189 else if (el_mode == SFmode && n == 4)
5190 {
5191 bdecl = mathfn_built_in (float_type_node, fn);
5192 suffix = "4"; /* powf -> powf4 */
5193 }
5194 else
5195 return NULL_TREE;
5196 if (!bdecl)
5197 return NULL_TREE;
5198 break;
5199
5200 default:
5201 return NULL_TREE;
5202 }
5203
5204 gcc_assert (suffix != NULL);
5205 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5206 if (!bname)
5207 return NULL_TREE;
5208
5209 strcpy (name, bname + sizeof ("__builtin_") - 1);
5210 strcat (name, suffix);
5211
5212 if (n_args == 1)
5213 fntype = build_function_type_list (type_out, type_in, NULL);
5214 else if (n_args == 2)
5215 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5216 else
5217 gcc_unreachable ();
5218
5219 /* Build a function declaration for the vectorized function. */
5220 new_fndecl = build_decl (BUILTINS_LOCATION,
5221 FUNCTION_DECL, get_identifier (name), fntype);
5222 TREE_PUBLIC (new_fndecl) = 1;
5223 DECL_EXTERNAL (new_fndecl) = 1;
5224 DECL_IS_NOVOPS (new_fndecl) = 1;
5225 TREE_READONLY (new_fndecl) = 1;
5226
5227 return new_fndecl;
5228 }
5229
5230 /* Returns a function decl for a vectorized version of the builtin function
5231 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5232 if it is not available. */
5233
5234 static tree
5235 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5236 tree type_in)
5237 {
5238 machine_mode in_mode, out_mode;
5239 int in_n, out_n;
5240
5241 if (TARGET_DEBUG_BUILTIN)
5242 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5243 combined_fn_name (combined_fn (fn)),
5244 GET_MODE_NAME (TYPE_MODE (type_out)),
5245 GET_MODE_NAME (TYPE_MODE (type_in)));
5246
5247 if (TREE_CODE (type_out) != VECTOR_TYPE
5248 || TREE_CODE (type_in) != VECTOR_TYPE)
5249 return NULL_TREE;
5250
5251 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5252 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5253 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5254 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5255
5256 switch (fn)
5257 {
5258 CASE_CFN_COPYSIGN:
5259 if (VECTOR_UNIT_VSX_P (V2DFmode)
5260 && out_mode == DFmode && out_n == 2
5261 && in_mode == DFmode && in_n == 2)
5262 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5263 if (VECTOR_UNIT_VSX_P (V4SFmode)
5264 && out_mode == SFmode && out_n == 4
5265 && in_mode == SFmode && in_n == 4)
5266 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5267 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5268 && out_mode == SFmode && out_n == 4
5269 && in_mode == SFmode && in_n == 4)
5270 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5271 break;
5272 CASE_CFN_CEIL:
5273 if (VECTOR_UNIT_VSX_P (V2DFmode)
5274 && out_mode == DFmode && out_n == 2
5275 && in_mode == DFmode && in_n == 2)
5276 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5277 if (VECTOR_UNIT_VSX_P (V4SFmode)
5278 && out_mode == SFmode && out_n == 4
5279 && in_mode == SFmode && in_n == 4)
5280 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5281 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5282 && out_mode == SFmode && out_n == 4
5283 && in_mode == SFmode && in_n == 4)
5284 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5285 break;
5286 CASE_CFN_FLOOR:
5287 if (VECTOR_UNIT_VSX_P (V2DFmode)
5288 && out_mode == DFmode && out_n == 2
5289 && in_mode == DFmode && in_n == 2)
5290 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5291 if (VECTOR_UNIT_VSX_P (V4SFmode)
5292 && out_mode == SFmode && out_n == 4
5293 && in_mode == SFmode && in_n == 4)
5294 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5295 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5296 && out_mode == SFmode && out_n == 4
5297 && in_mode == SFmode && in_n == 4)
5298 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5299 break;
5300 CASE_CFN_FMA:
5301 if (VECTOR_UNIT_VSX_P (V2DFmode)
5302 && out_mode == DFmode && out_n == 2
5303 && in_mode == DFmode && in_n == 2)
5304 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5305 if (VECTOR_UNIT_VSX_P (V4SFmode)
5306 && out_mode == SFmode && out_n == 4
5307 && in_mode == SFmode && in_n == 4)
5308 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5309 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5310 && out_mode == SFmode && out_n == 4
5311 && in_mode == SFmode && in_n == 4)
5312 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5313 break;
5314 CASE_CFN_TRUNC:
5315 if (VECTOR_UNIT_VSX_P (V2DFmode)
5316 && out_mode == DFmode && out_n == 2
5317 && in_mode == DFmode && in_n == 2)
5318 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5319 if (VECTOR_UNIT_VSX_P (V4SFmode)
5320 && out_mode == SFmode && out_n == 4
5321 && in_mode == SFmode && in_n == 4)
5322 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5323 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5324 && out_mode == SFmode && out_n == 4
5325 && in_mode == SFmode && in_n == 4)
5326 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5327 break;
5328 CASE_CFN_NEARBYINT:
5329 if (VECTOR_UNIT_VSX_P (V2DFmode)
5330 && flag_unsafe_math_optimizations
5331 && out_mode == DFmode && out_n == 2
5332 && in_mode == DFmode && in_n == 2)
5333 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5334 if (VECTOR_UNIT_VSX_P (V4SFmode)
5335 && flag_unsafe_math_optimizations
5336 && out_mode == SFmode && out_n == 4
5337 && in_mode == SFmode && in_n == 4)
5338 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5339 break;
5340 CASE_CFN_RINT:
5341 if (VECTOR_UNIT_VSX_P (V2DFmode)
5342 && !flag_trapping_math
5343 && out_mode == DFmode && out_n == 2
5344 && in_mode == DFmode && in_n == 2)
5345 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5346 if (VECTOR_UNIT_VSX_P (V4SFmode)
5347 && !flag_trapping_math
5348 && out_mode == SFmode && out_n == 4
5349 && in_mode == SFmode && in_n == 4)
5350 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5351 break;
5352 default:
5353 break;
5354 }
5355
5356 /* Generate calls to libmass if appropriate. */
5357 if (rs6000_veclib_handler)
5358 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5359
5360 return NULL_TREE;
5361 }
5362
5363 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5364
5365 static tree
5366 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5367 tree type_in)
5368 {
5369 machine_mode in_mode, out_mode;
5370 int in_n, out_n;
5371
5372 if (TARGET_DEBUG_BUILTIN)
5373 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5374 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5375 GET_MODE_NAME (TYPE_MODE (type_out)),
5376 GET_MODE_NAME (TYPE_MODE (type_in)));
5377
5378 if (TREE_CODE (type_out) != VECTOR_TYPE
5379 || TREE_CODE (type_in) != VECTOR_TYPE)
5380 return NULL_TREE;
5381
5382 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5383 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5384 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5385 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5386
5387 enum rs6000_builtins fn
5388 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5389 switch (fn)
5390 {
5391 case RS6000_BUILTIN_RSQRTF:
5392 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5393 && out_mode == SFmode && out_n == 4
5394 && in_mode == SFmode && in_n == 4)
5395 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5396 break;
5397 case RS6000_BUILTIN_RSQRT:
5398 if (VECTOR_UNIT_VSX_P (V2DFmode)
5399 && out_mode == DFmode && out_n == 2
5400 && in_mode == DFmode && in_n == 2)
5401 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5402 break;
5403 case RS6000_BUILTIN_RECIPF:
5404 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5405 && out_mode == SFmode && out_n == 4
5406 && in_mode == SFmode && in_n == 4)
5407 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5408 break;
5409 case RS6000_BUILTIN_RECIP:
5410 if (VECTOR_UNIT_VSX_P (V2DFmode)
5411 && out_mode == DFmode && out_n == 2
5412 && in_mode == DFmode && in_n == 2)
5413 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5414 break;
5415 default:
5416 break;
5417 }
5418 return NULL_TREE;
5419 }
5420 \f
5421 /* Default CPU string for rs6000*_file_start functions. */
5422 static const char *rs6000_default_cpu;
5423
5424 #ifdef USING_ELFOS_H
5425 const char *rs6000_machine;
5426
5427 const char *
5428 rs6000_machine_from_flags (void)
5429 {
5430 HOST_WIDE_INT flags = rs6000_isa_flags;
5431
5432 /* Disable the flags that should never influence the .machine selection. */
5433 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5434
5435 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5436 return "future";
5437 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5438 return "power9";
5439 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5440 return "power8";
5441 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5442 return "power7";
5443 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5444 return "power6";
5445 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5446 return "power5";
5447 if ((flags & ISA_2_1_MASKS) != 0)
5448 return "power4";
5449 if ((flags & OPTION_MASK_POWERPC64) != 0)
5450 return "ppc64";
5451 return "ppc";
5452 }
5453
5454 void
5455 emit_asm_machine (void)
5456 {
5457 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5458 }
5459 #endif
5460
5461 /* Do anything needed at the start of the asm file. */
5462
5463 static void
5464 rs6000_file_start (void)
5465 {
5466 char buffer[80];
5467 const char *start = buffer;
5468 FILE *file = asm_out_file;
5469
5470 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5471
5472 default_file_start ();
5473
5474 if (flag_verbose_asm)
5475 {
5476 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5477
5478 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5479 {
5480 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5481 start = "";
5482 }
5483
5484 if (global_options_set.x_rs6000_cpu_index)
5485 {
5486 fprintf (file, "%s -mcpu=%s", start,
5487 processor_target_table[rs6000_cpu_index].name);
5488 start = "";
5489 }
5490
5491 if (global_options_set.x_rs6000_tune_index)
5492 {
5493 fprintf (file, "%s -mtune=%s", start,
5494 processor_target_table[rs6000_tune_index].name);
5495 start = "";
5496 }
5497
5498 if (PPC405_ERRATUM77)
5499 {
5500 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5501 start = "";
5502 }
5503
5504 #ifdef USING_ELFOS_H
5505 switch (rs6000_sdata)
5506 {
5507 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5508 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5509 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5510 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5511 }
5512
5513 if (rs6000_sdata && g_switch_value)
5514 {
5515 fprintf (file, "%s -G %d", start,
5516 g_switch_value);
5517 start = "";
5518 }
5519 #endif
5520
5521 if (*start == '\0')
5522 putc ('\n', file);
5523 }
5524
5525 #ifdef USING_ELFOS_H
5526 rs6000_machine = rs6000_machine_from_flags ();
5527 emit_asm_machine ();
5528 #endif
5529
5530 if (DEFAULT_ABI == ABI_ELFv2)
5531 fprintf (file, "\t.abiversion 2\n");
5532 }
5533
5534 \f
5535 /* Return nonzero if this function is known to have a null epilogue. */
5536
5537 int
5538 direct_return (void)
5539 {
5540 if (reload_completed)
5541 {
5542 rs6000_stack_t *info = rs6000_stack_info ();
5543
5544 if (info->first_gp_reg_save == 32
5545 && info->first_fp_reg_save == 64
5546 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5547 && ! info->lr_save_p
5548 && ! info->cr_save_p
5549 && info->vrsave_size == 0
5550 && ! info->push_p)
5551 return 1;
5552 }
5553
5554 return 0;
5555 }
5556
5557 /* Helper for num_insns_constant. Calculate number of instructions to
5558 load VALUE to a single gpr using combinations of addi, addis, ori,
5559 oris and sldi instructions. */
5560
5561 static int
5562 num_insns_constant_gpr (HOST_WIDE_INT value)
5563 {
5564 /* signed constant loadable with addi */
5565 if (SIGNED_INTEGER_16BIT_P (value))
5566 return 1;
5567
5568 /* constant loadable with addis */
5569 else if ((value & 0xffff) == 0
5570 && (value >> 31 == -1 || value >> 31 == 0))
5571 return 1;
5572
5573 /* PADDI can support up to 34 bit signed integers. */
5574 else if (TARGET_PREFIXED_ADDR && SIGNED_INTEGER_34BIT_P (value))
5575 return 1;
5576
5577 else if (TARGET_POWERPC64)
5578 {
5579 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5580 HOST_WIDE_INT high = value >> 31;
5581
5582 if (high == 0 || high == -1)
5583 return 2;
5584
5585 high >>= 1;
5586
5587 if (low == 0)
5588 return num_insns_constant_gpr (high) + 1;
5589 else if (high == 0)
5590 return num_insns_constant_gpr (low) + 1;
5591 else
5592 return (num_insns_constant_gpr (high)
5593 + num_insns_constant_gpr (low) + 1);
5594 }
5595
5596 else
5597 return 2;
5598 }
5599
5600 /* Helper for num_insns_constant. Allow constants formed by the
5601 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5602 and handle modes that require multiple gprs. */
5603
5604 static int
5605 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5606 {
5607 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5608 int total = 0;
5609 while (nregs-- > 0)
5610 {
5611 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5612 int insns = num_insns_constant_gpr (low);
5613 if (insns > 2
5614 /* We won't get more than 2 from num_insns_constant_gpr
5615 except when TARGET_POWERPC64 and mode is DImode or
5616 wider, so the register mode must be DImode. */
5617 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5618 insns = 2;
5619 total += insns;
5620 value >>= BITS_PER_WORD;
5621 }
5622 return total;
5623 }
5624
5625 /* Return the number of instructions it takes to form a constant in as
5626 many gprs are needed for MODE. */
5627
5628 int
5629 num_insns_constant (rtx op, machine_mode mode)
5630 {
5631 HOST_WIDE_INT val;
5632
5633 switch (GET_CODE (op))
5634 {
5635 case CONST_INT:
5636 val = INTVAL (op);
5637 break;
5638
5639 case CONST_WIDE_INT:
5640 {
5641 int insns = 0;
5642 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5643 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5644 DImode);
5645 return insns;
5646 }
5647
5648 case CONST_DOUBLE:
5649 {
5650 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5651
5652 if (mode == SFmode || mode == SDmode)
5653 {
5654 long l;
5655
5656 if (mode == SDmode)
5657 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5658 else
5659 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5660 /* See the first define_split in rs6000.md handling a
5661 const_double_operand. */
5662 val = l;
5663 mode = SImode;
5664 }
5665 else if (mode == DFmode || mode == DDmode)
5666 {
5667 long l[2];
5668
5669 if (mode == DDmode)
5670 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5671 else
5672 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5673
5674 /* See the second (32-bit) and third (64-bit) define_split
5675 in rs6000.md handling a const_double_operand. */
5676 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5677 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5678 mode = DImode;
5679 }
5680 else if (mode == TFmode || mode == TDmode
5681 || mode == KFmode || mode == IFmode)
5682 {
5683 long l[4];
5684 int insns;
5685
5686 if (mode == TDmode)
5687 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5688 else
5689 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5690
5691 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5692 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5693 insns = num_insns_constant_multi (val, DImode);
5694 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5695 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5696 insns += num_insns_constant_multi (val, DImode);
5697 return insns;
5698 }
5699 else
5700 gcc_unreachable ();
5701 }
5702 break;
5703
5704 default:
5705 gcc_unreachable ();
5706 }
5707
5708 return num_insns_constant_multi (val, mode);
5709 }
5710
5711 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5712 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5713 corresponding element of the vector, but for V4SFmode, the
5714 corresponding "float" is interpreted as an SImode integer. */
5715
5716 HOST_WIDE_INT
5717 const_vector_elt_as_int (rtx op, unsigned int elt)
5718 {
5719 rtx tmp;
5720
5721 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5722 gcc_assert (GET_MODE (op) != V2DImode
5723 && GET_MODE (op) != V2DFmode);
5724
5725 tmp = CONST_VECTOR_ELT (op, elt);
5726 if (GET_MODE (op) == V4SFmode)
5727 tmp = gen_lowpart (SImode, tmp);
5728 return INTVAL (tmp);
5729 }
5730
5731 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5732 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5733 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5734 all items are set to the same value and contain COPIES replicas of the
5735 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5736 operand and the others are set to the value of the operand's msb. */
5737
5738 static bool
5739 vspltis_constant (rtx op, unsigned step, unsigned copies)
5740 {
5741 machine_mode mode = GET_MODE (op);
5742 machine_mode inner = GET_MODE_INNER (mode);
5743
5744 unsigned i;
5745 unsigned nunits;
5746 unsigned bitsize;
5747 unsigned mask;
5748
5749 HOST_WIDE_INT val;
5750 HOST_WIDE_INT splat_val;
5751 HOST_WIDE_INT msb_val;
5752
5753 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5754 return false;
5755
5756 nunits = GET_MODE_NUNITS (mode);
5757 bitsize = GET_MODE_BITSIZE (inner);
5758 mask = GET_MODE_MASK (inner);
5759
5760 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5761 splat_val = val;
5762 msb_val = val >= 0 ? 0 : -1;
5763
5764 /* Construct the value to be splatted, if possible. If not, return 0. */
5765 for (i = 2; i <= copies; i *= 2)
5766 {
5767 HOST_WIDE_INT small_val;
5768 bitsize /= 2;
5769 small_val = splat_val >> bitsize;
5770 mask >>= bitsize;
5771 if (splat_val != ((HOST_WIDE_INT)
5772 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5773 | (small_val & mask)))
5774 return false;
5775 splat_val = small_val;
5776 }
5777
5778 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5779 if (EASY_VECTOR_15 (splat_val))
5780 ;
5781
5782 /* Also check if we can splat, and then add the result to itself. Do so if
5783 the value is positive, of if the splat instruction is using OP's mode;
5784 for splat_val < 0, the splat and the add should use the same mode. */
5785 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5786 && (splat_val >= 0 || (step == 1 && copies == 1)))
5787 ;
5788
5789 /* Also check if are loading up the most significant bit which can be done by
5790 loading up -1 and shifting the value left by -1. */
5791 else if (EASY_VECTOR_MSB (splat_val, inner))
5792 ;
5793
5794 else
5795 return false;
5796
5797 /* Check if VAL is present in every STEP-th element, and the
5798 other elements are filled with its most significant bit. */
5799 for (i = 1; i < nunits; ++i)
5800 {
5801 HOST_WIDE_INT desired_val;
5802 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5803 if ((i & (step - 1)) == 0)
5804 desired_val = val;
5805 else
5806 desired_val = msb_val;
5807
5808 if (desired_val != const_vector_elt_as_int (op, elt))
5809 return false;
5810 }
5811
5812 return true;
5813 }
5814
5815 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5816 instruction, filling in the bottom elements with 0 or -1.
5817
5818 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5819 for the number of zeroes to shift in, or negative for the number of 0xff
5820 bytes to shift in.
5821
5822 OP is a CONST_VECTOR. */
5823
5824 int
5825 vspltis_shifted (rtx op)
5826 {
5827 machine_mode mode = GET_MODE (op);
5828 machine_mode inner = GET_MODE_INNER (mode);
5829
5830 unsigned i, j;
5831 unsigned nunits;
5832 unsigned mask;
5833
5834 HOST_WIDE_INT val;
5835
5836 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5837 return false;
5838
5839 /* We need to create pseudo registers to do the shift, so don't recognize
5840 shift vector constants after reload. */
5841 if (!can_create_pseudo_p ())
5842 return false;
5843
5844 nunits = GET_MODE_NUNITS (mode);
5845 mask = GET_MODE_MASK (inner);
5846
5847 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5848
5849 /* Check if the value can really be the operand of a vspltis[bhw]. */
5850 if (EASY_VECTOR_15 (val))
5851 ;
5852
5853 /* Also check if we are loading up the most significant bit which can be done
5854 by loading up -1 and shifting the value left by -1. */
5855 else if (EASY_VECTOR_MSB (val, inner))
5856 ;
5857
5858 else
5859 return 0;
5860
5861 /* Check if VAL is present in every STEP-th element until we find elements
5862 that are 0 or all 1 bits. */
5863 for (i = 1; i < nunits; ++i)
5864 {
5865 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5866 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5867
5868 /* If the value isn't the splat value, check for the remaining elements
5869 being 0/-1. */
5870 if (val != elt_val)
5871 {
5872 if (elt_val == 0)
5873 {
5874 for (j = i+1; j < nunits; ++j)
5875 {
5876 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5877 if (const_vector_elt_as_int (op, elt2) != 0)
5878 return 0;
5879 }
5880
5881 return (nunits - i) * GET_MODE_SIZE (inner);
5882 }
5883
5884 else if ((elt_val & mask) == mask)
5885 {
5886 for (j = i+1; j < nunits; ++j)
5887 {
5888 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5889 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5890 return 0;
5891 }
5892
5893 return -((nunits - i) * GET_MODE_SIZE (inner));
5894 }
5895
5896 else
5897 return 0;
5898 }
5899 }
5900
5901 /* If all elements are equal, we don't need to do VLSDOI. */
5902 return 0;
5903 }
5904
5905
5906 /* Return true if OP is of the given MODE and can be synthesized
5907 with a vspltisb, vspltish or vspltisw. */
5908
5909 bool
5910 easy_altivec_constant (rtx op, machine_mode mode)
5911 {
5912 unsigned step, copies;
5913
5914 if (mode == VOIDmode)
5915 mode = GET_MODE (op);
5916 else if (mode != GET_MODE (op))
5917 return false;
5918
5919 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5920 constants. */
5921 if (mode == V2DFmode)
5922 return zero_constant (op, mode);
5923
5924 else if (mode == V2DImode)
5925 {
5926 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5927 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5928 return false;
5929
5930 if (zero_constant (op, mode))
5931 return true;
5932
5933 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5934 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5935 return true;
5936
5937 return false;
5938 }
5939
5940 /* V1TImode is a special container for TImode. Ignore for now. */
5941 else if (mode == V1TImode)
5942 return false;
5943
5944 /* Start with a vspltisw. */
5945 step = GET_MODE_NUNITS (mode) / 4;
5946 copies = 1;
5947
5948 if (vspltis_constant (op, step, copies))
5949 return true;
5950
5951 /* Then try with a vspltish. */
5952 if (step == 1)
5953 copies <<= 1;
5954 else
5955 step >>= 1;
5956
5957 if (vspltis_constant (op, step, copies))
5958 return true;
5959
5960 /* And finally a vspltisb. */
5961 if (step == 1)
5962 copies <<= 1;
5963 else
5964 step >>= 1;
5965
5966 if (vspltis_constant (op, step, copies))
5967 return true;
5968
5969 if (vspltis_shifted (op) != 0)
5970 return true;
5971
5972 return false;
5973 }
5974
5975 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5976 result is OP. Abort if it is not possible. */
5977
5978 rtx
5979 gen_easy_altivec_constant (rtx op)
5980 {
5981 machine_mode mode = GET_MODE (op);
5982 int nunits = GET_MODE_NUNITS (mode);
5983 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5984 unsigned step = nunits / 4;
5985 unsigned copies = 1;
5986
5987 /* Start with a vspltisw. */
5988 if (vspltis_constant (op, step, copies))
5989 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5990
5991 /* Then try with a vspltish. */
5992 if (step == 1)
5993 copies <<= 1;
5994 else
5995 step >>= 1;
5996
5997 if (vspltis_constant (op, step, copies))
5998 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5999
6000 /* And finally a vspltisb. */
6001 if (step == 1)
6002 copies <<= 1;
6003 else
6004 step >>= 1;
6005
6006 if (vspltis_constant (op, step, copies))
6007 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6008
6009 gcc_unreachable ();
6010 }
6011
6012 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6013 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6014
6015 Return the number of instructions needed (1 or 2) into the address pointed
6016 via NUM_INSNS_PTR.
6017
6018 Return the constant that is being split via CONSTANT_PTR. */
6019
6020 bool
6021 xxspltib_constant_p (rtx op,
6022 machine_mode mode,
6023 int *num_insns_ptr,
6024 int *constant_ptr)
6025 {
6026 size_t nunits = GET_MODE_NUNITS (mode);
6027 size_t i;
6028 HOST_WIDE_INT value;
6029 rtx element;
6030
6031 /* Set the returned values to out of bound values. */
6032 *num_insns_ptr = -1;
6033 *constant_ptr = 256;
6034
6035 if (!TARGET_P9_VECTOR)
6036 return false;
6037
6038 if (mode == VOIDmode)
6039 mode = GET_MODE (op);
6040
6041 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6042 return false;
6043
6044 /* Handle (vec_duplicate <constant>). */
6045 if (GET_CODE (op) == VEC_DUPLICATE)
6046 {
6047 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6048 && mode != V2DImode)
6049 return false;
6050
6051 element = XEXP (op, 0);
6052 if (!CONST_INT_P (element))
6053 return false;
6054
6055 value = INTVAL (element);
6056 if (!IN_RANGE (value, -128, 127))
6057 return false;
6058 }
6059
6060 /* Handle (const_vector [...]). */
6061 else if (GET_CODE (op) == CONST_VECTOR)
6062 {
6063 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6064 && mode != V2DImode)
6065 return false;
6066
6067 element = CONST_VECTOR_ELT (op, 0);
6068 if (!CONST_INT_P (element))
6069 return false;
6070
6071 value = INTVAL (element);
6072 if (!IN_RANGE (value, -128, 127))
6073 return false;
6074
6075 for (i = 1; i < nunits; i++)
6076 {
6077 element = CONST_VECTOR_ELT (op, i);
6078 if (!CONST_INT_P (element))
6079 return false;
6080
6081 if (value != INTVAL (element))
6082 return false;
6083 }
6084 }
6085
6086 /* Handle integer constants being loaded into the upper part of the VSX
6087 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6088 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6089 else if (CONST_INT_P (op))
6090 {
6091 if (!SCALAR_INT_MODE_P (mode))
6092 return false;
6093
6094 value = INTVAL (op);
6095 if (!IN_RANGE (value, -128, 127))
6096 return false;
6097
6098 if (!IN_RANGE (value, -1, 0))
6099 {
6100 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6101 return false;
6102
6103 if (EASY_VECTOR_15 (value))
6104 return false;
6105 }
6106 }
6107
6108 else
6109 return false;
6110
6111 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6112 sign extend. Special case 0/-1 to allow getting any VSX register instead
6113 of an Altivec register. */
6114 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6115 && EASY_VECTOR_15 (value))
6116 return false;
6117
6118 /* Return # of instructions and the constant byte for XXSPLTIB. */
6119 if (mode == V16QImode)
6120 *num_insns_ptr = 1;
6121
6122 else if (IN_RANGE (value, -1, 0))
6123 *num_insns_ptr = 1;
6124
6125 else
6126 *num_insns_ptr = 2;
6127
6128 *constant_ptr = (int) value;
6129 return true;
6130 }
6131
6132 const char *
6133 output_vec_const_move (rtx *operands)
6134 {
6135 int shift;
6136 machine_mode mode;
6137 rtx dest, vec;
6138
6139 dest = operands[0];
6140 vec = operands[1];
6141 mode = GET_MODE (dest);
6142
6143 if (TARGET_VSX)
6144 {
6145 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6146 int xxspltib_value = 256;
6147 int num_insns = -1;
6148
6149 if (zero_constant (vec, mode))
6150 {
6151 if (TARGET_P9_VECTOR)
6152 return "xxspltib %x0,0";
6153
6154 else if (dest_vmx_p)
6155 return "vspltisw %0,0";
6156
6157 else
6158 return "xxlxor %x0,%x0,%x0";
6159 }
6160
6161 if (all_ones_constant (vec, mode))
6162 {
6163 if (TARGET_P9_VECTOR)
6164 return "xxspltib %x0,255";
6165
6166 else if (dest_vmx_p)
6167 return "vspltisw %0,-1";
6168
6169 else if (TARGET_P8_VECTOR)
6170 return "xxlorc %x0,%x0,%x0";
6171
6172 else
6173 gcc_unreachable ();
6174 }
6175
6176 if (TARGET_P9_VECTOR
6177 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6178 {
6179 if (num_insns == 1)
6180 {
6181 operands[2] = GEN_INT (xxspltib_value & 0xff);
6182 return "xxspltib %x0,%2";
6183 }
6184
6185 return "#";
6186 }
6187 }
6188
6189 if (TARGET_ALTIVEC)
6190 {
6191 rtx splat_vec;
6192
6193 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6194 if (zero_constant (vec, mode))
6195 return "vspltisw %0,0";
6196
6197 if (all_ones_constant (vec, mode))
6198 return "vspltisw %0,-1";
6199
6200 /* Do we need to construct a value using VSLDOI? */
6201 shift = vspltis_shifted (vec);
6202 if (shift != 0)
6203 return "#";
6204
6205 splat_vec = gen_easy_altivec_constant (vec);
6206 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6207 operands[1] = XEXP (splat_vec, 0);
6208 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6209 return "#";
6210
6211 switch (GET_MODE (splat_vec))
6212 {
6213 case E_V4SImode:
6214 return "vspltisw %0,%1";
6215
6216 case E_V8HImode:
6217 return "vspltish %0,%1";
6218
6219 case E_V16QImode:
6220 return "vspltisb %0,%1";
6221
6222 default:
6223 gcc_unreachable ();
6224 }
6225 }
6226
6227 gcc_unreachable ();
6228 }
6229
6230 /* Initialize vector TARGET to VALS. */
6231
6232 void
6233 rs6000_expand_vector_init (rtx target, rtx vals)
6234 {
6235 machine_mode mode = GET_MODE (target);
6236 machine_mode inner_mode = GET_MODE_INNER (mode);
6237 int n_elts = GET_MODE_NUNITS (mode);
6238 int n_var = 0, one_var = -1;
6239 bool all_same = true, all_const_zero = true;
6240 rtx x, mem;
6241 int i;
6242
6243 for (i = 0; i < n_elts; ++i)
6244 {
6245 x = XVECEXP (vals, 0, i);
6246 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6247 ++n_var, one_var = i;
6248 else if (x != CONST0_RTX (inner_mode))
6249 all_const_zero = false;
6250
6251 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6252 all_same = false;
6253 }
6254
6255 if (n_var == 0)
6256 {
6257 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6258 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6259 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6260 {
6261 /* Zero register. */
6262 emit_move_insn (target, CONST0_RTX (mode));
6263 return;
6264 }
6265 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6266 {
6267 /* Splat immediate. */
6268 emit_insn (gen_rtx_SET (target, const_vec));
6269 return;
6270 }
6271 else
6272 {
6273 /* Load from constant pool. */
6274 emit_move_insn (target, const_vec);
6275 return;
6276 }
6277 }
6278
6279 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6280 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6281 {
6282 rtx op[2];
6283 size_t i;
6284 size_t num_elements = all_same ? 1 : 2;
6285 for (i = 0; i < num_elements; i++)
6286 {
6287 op[i] = XVECEXP (vals, 0, i);
6288 /* Just in case there is a SUBREG with a smaller mode, do a
6289 conversion. */
6290 if (GET_MODE (op[i]) != inner_mode)
6291 {
6292 rtx tmp = gen_reg_rtx (inner_mode);
6293 convert_move (tmp, op[i], 0);
6294 op[i] = tmp;
6295 }
6296 /* Allow load with splat double word. */
6297 else if (MEM_P (op[i]))
6298 {
6299 if (!all_same)
6300 op[i] = force_reg (inner_mode, op[i]);
6301 }
6302 else if (!REG_P (op[i]))
6303 op[i] = force_reg (inner_mode, op[i]);
6304 }
6305
6306 if (all_same)
6307 {
6308 if (mode == V2DFmode)
6309 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6310 else
6311 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6312 }
6313 else
6314 {
6315 if (mode == V2DFmode)
6316 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6317 else
6318 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6319 }
6320 return;
6321 }
6322
6323 /* Special case initializing vector int if we are on 64-bit systems with
6324 direct move or we have the ISA 3.0 instructions. */
6325 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6326 && TARGET_DIRECT_MOVE_64BIT)
6327 {
6328 if (all_same)
6329 {
6330 rtx element0 = XVECEXP (vals, 0, 0);
6331 if (MEM_P (element0))
6332 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6333 else
6334 element0 = force_reg (SImode, element0);
6335
6336 if (TARGET_P9_VECTOR)
6337 emit_insn (gen_vsx_splat_v4si (target, element0));
6338 else
6339 {
6340 rtx tmp = gen_reg_rtx (DImode);
6341 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6342 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6343 }
6344 return;
6345 }
6346 else
6347 {
6348 rtx elements[4];
6349 size_t i;
6350
6351 for (i = 0; i < 4; i++)
6352 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6353
6354 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6355 elements[2], elements[3]));
6356 return;
6357 }
6358 }
6359
6360 /* With single precision floating point on VSX, know that internally single
6361 precision is actually represented as a double, and either make 2 V2DF
6362 vectors, and convert these vectors to single precision, or do one
6363 conversion, and splat the result to the other elements. */
6364 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6365 {
6366 if (all_same)
6367 {
6368 rtx element0 = XVECEXP (vals, 0, 0);
6369
6370 if (TARGET_P9_VECTOR)
6371 {
6372 if (MEM_P (element0))
6373 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6374
6375 emit_insn (gen_vsx_splat_v4sf (target, element0));
6376 }
6377
6378 else
6379 {
6380 rtx freg = gen_reg_rtx (V4SFmode);
6381 rtx sreg = force_reg (SFmode, element0);
6382 rtx cvt = (TARGET_XSCVDPSPN
6383 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6384 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6385
6386 emit_insn (cvt);
6387 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6388 const0_rtx));
6389 }
6390 }
6391 else
6392 {
6393 rtx dbl_even = gen_reg_rtx (V2DFmode);
6394 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6395 rtx flt_even = gen_reg_rtx (V4SFmode);
6396 rtx flt_odd = gen_reg_rtx (V4SFmode);
6397 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6398 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6399 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6400 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6401
6402 /* Use VMRGEW if we can instead of doing a permute. */
6403 if (TARGET_P8_VECTOR)
6404 {
6405 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6406 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6407 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6408 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6409 if (BYTES_BIG_ENDIAN)
6410 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6411 else
6412 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6413 }
6414 else
6415 {
6416 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6417 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6418 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6419 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6420 rs6000_expand_extract_even (target, flt_even, flt_odd);
6421 }
6422 }
6423 return;
6424 }
6425
6426 /* Special case initializing vector short/char that are splats if we are on
6427 64-bit systems with direct move. */
6428 if (all_same && TARGET_DIRECT_MOVE_64BIT
6429 && (mode == V16QImode || mode == V8HImode))
6430 {
6431 rtx op0 = XVECEXP (vals, 0, 0);
6432 rtx di_tmp = gen_reg_rtx (DImode);
6433
6434 if (!REG_P (op0))
6435 op0 = force_reg (GET_MODE_INNER (mode), op0);
6436
6437 if (mode == V16QImode)
6438 {
6439 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6440 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6441 return;
6442 }
6443
6444 if (mode == V8HImode)
6445 {
6446 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6447 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6448 return;
6449 }
6450 }
6451
6452 /* Store value to stack temp. Load vector element. Splat. However, splat
6453 of 64-bit items is not supported on Altivec. */
6454 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6455 {
6456 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6457 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6458 XVECEXP (vals, 0, 0));
6459 x = gen_rtx_UNSPEC (VOIDmode,
6460 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6461 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6462 gen_rtvec (2,
6463 gen_rtx_SET (target, mem),
6464 x)));
6465 x = gen_rtx_VEC_SELECT (inner_mode, target,
6466 gen_rtx_PARALLEL (VOIDmode,
6467 gen_rtvec (1, const0_rtx)));
6468 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6469 return;
6470 }
6471
6472 /* One field is non-constant. Load constant then overwrite
6473 varying field. */
6474 if (n_var == 1)
6475 {
6476 rtx copy = copy_rtx (vals);
6477
6478 /* Load constant part of vector, substitute neighboring value for
6479 varying element. */
6480 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6481 rs6000_expand_vector_init (target, copy);
6482
6483 /* Insert variable. */
6484 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6485 return;
6486 }
6487
6488 /* Construct the vector in memory one field at a time
6489 and load the whole vector. */
6490 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6491 for (i = 0; i < n_elts; i++)
6492 emit_move_insn (adjust_address_nv (mem, inner_mode,
6493 i * GET_MODE_SIZE (inner_mode)),
6494 XVECEXP (vals, 0, i));
6495 emit_move_insn (target, mem);
6496 }
6497
6498 /* Set field ELT of TARGET to VAL. */
6499
6500 void
6501 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6502 {
6503 machine_mode mode = GET_MODE (target);
6504 machine_mode inner_mode = GET_MODE_INNER (mode);
6505 rtx reg = gen_reg_rtx (mode);
6506 rtx mask, mem, x;
6507 int width = GET_MODE_SIZE (inner_mode);
6508 int i;
6509
6510 val = force_reg (GET_MODE (val), val);
6511
6512 if (VECTOR_MEM_VSX_P (mode))
6513 {
6514 rtx insn = NULL_RTX;
6515 rtx elt_rtx = GEN_INT (elt);
6516
6517 if (mode == V2DFmode)
6518 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6519
6520 else if (mode == V2DImode)
6521 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6522
6523 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6524 {
6525 if (mode == V4SImode)
6526 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6527 else if (mode == V8HImode)
6528 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6529 else if (mode == V16QImode)
6530 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6531 else if (mode == V4SFmode)
6532 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6533 }
6534
6535 if (insn)
6536 {
6537 emit_insn (insn);
6538 return;
6539 }
6540 }
6541
6542 /* Simplify setting single element vectors like V1TImode. */
6543 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6544 {
6545 emit_move_insn (target, gen_lowpart (mode, val));
6546 return;
6547 }
6548
6549 /* Load single variable value. */
6550 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6551 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6552 x = gen_rtx_UNSPEC (VOIDmode,
6553 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6554 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6555 gen_rtvec (2,
6556 gen_rtx_SET (reg, mem),
6557 x)));
6558
6559 /* Linear sequence. */
6560 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6561 for (i = 0; i < 16; ++i)
6562 XVECEXP (mask, 0, i) = GEN_INT (i);
6563
6564 /* Set permute mask to insert element into target. */
6565 for (i = 0; i < width; ++i)
6566 XVECEXP (mask, 0, elt*width + i)
6567 = GEN_INT (i + 0x10);
6568 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6569
6570 if (BYTES_BIG_ENDIAN)
6571 x = gen_rtx_UNSPEC (mode,
6572 gen_rtvec (3, target, reg,
6573 force_reg (V16QImode, x)),
6574 UNSPEC_VPERM);
6575 else
6576 {
6577 if (TARGET_P9_VECTOR)
6578 x = gen_rtx_UNSPEC (mode,
6579 gen_rtvec (3, reg, target,
6580 force_reg (V16QImode, x)),
6581 UNSPEC_VPERMR);
6582 else
6583 {
6584 /* Invert selector. We prefer to generate VNAND on P8 so
6585 that future fusion opportunities can kick in, but must
6586 generate VNOR elsewhere. */
6587 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6588 rtx iorx = (TARGET_P8_VECTOR
6589 ? gen_rtx_IOR (V16QImode, notx, notx)
6590 : gen_rtx_AND (V16QImode, notx, notx));
6591 rtx tmp = gen_reg_rtx (V16QImode);
6592 emit_insn (gen_rtx_SET (tmp, iorx));
6593
6594 /* Permute with operands reversed and adjusted selector. */
6595 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6596 UNSPEC_VPERM);
6597 }
6598 }
6599
6600 emit_insn (gen_rtx_SET (target, x));
6601 }
6602
6603 /* Extract field ELT from VEC into TARGET. */
6604
6605 void
6606 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6607 {
6608 machine_mode mode = GET_MODE (vec);
6609 machine_mode inner_mode = GET_MODE_INNER (mode);
6610 rtx mem;
6611
6612 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6613 {
6614 switch (mode)
6615 {
6616 default:
6617 break;
6618 case E_V1TImode:
6619 emit_move_insn (target, gen_lowpart (TImode, vec));
6620 break;
6621 case E_V2DFmode:
6622 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6623 return;
6624 case E_V2DImode:
6625 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6626 return;
6627 case E_V4SFmode:
6628 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6629 return;
6630 case E_V16QImode:
6631 if (TARGET_DIRECT_MOVE_64BIT)
6632 {
6633 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6634 return;
6635 }
6636 else
6637 break;
6638 case E_V8HImode:
6639 if (TARGET_DIRECT_MOVE_64BIT)
6640 {
6641 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6642 return;
6643 }
6644 else
6645 break;
6646 case E_V4SImode:
6647 if (TARGET_DIRECT_MOVE_64BIT)
6648 {
6649 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6650 return;
6651 }
6652 break;
6653 }
6654 }
6655 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6656 && TARGET_DIRECT_MOVE_64BIT)
6657 {
6658 if (GET_MODE (elt) != DImode)
6659 {
6660 rtx tmp = gen_reg_rtx (DImode);
6661 convert_move (tmp, elt, 0);
6662 elt = tmp;
6663 }
6664 else if (!REG_P (elt))
6665 elt = force_reg (DImode, elt);
6666
6667 switch (mode)
6668 {
6669 case E_V1TImode:
6670 emit_move_insn (target, gen_lowpart (TImode, vec));
6671 return;
6672
6673 case E_V2DFmode:
6674 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6675 return;
6676
6677 case E_V2DImode:
6678 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6679 return;
6680
6681 case E_V4SFmode:
6682 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6683 return;
6684
6685 case E_V4SImode:
6686 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6687 return;
6688
6689 case E_V8HImode:
6690 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6691 return;
6692
6693 case E_V16QImode:
6694 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6695 return;
6696
6697 default:
6698 gcc_unreachable ();
6699 }
6700 }
6701
6702 /* Allocate mode-sized buffer. */
6703 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6704
6705 emit_move_insn (mem, vec);
6706 if (CONST_INT_P (elt))
6707 {
6708 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6709
6710 /* Add offset to field within buffer matching vector element. */
6711 mem = adjust_address_nv (mem, inner_mode,
6712 modulo_elt * GET_MODE_SIZE (inner_mode));
6713 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6714 }
6715 else
6716 {
6717 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6718 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6719 rtx new_addr = gen_reg_rtx (Pmode);
6720
6721 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6722 if (ele_size > 1)
6723 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6724 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6725 new_addr = change_address (mem, inner_mode, new_addr);
6726 emit_move_insn (target, new_addr);
6727 }
6728 }
6729
6730 /* Return the offset within a memory object (MEM) of a vector type to a given
6731 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6732 the element is constant, we return a constant integer.
6733
6734 Otherwise, we use a base register temporary to calculate the offset after
6735 masking it to fit within the bounds of the vector and scaling it. The
6736 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6737 built-in function. */
6738
6739 static rtx
6740 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6741 {
6742 if (CONST_INT_P (element))
6743 return GEN_INT (INTVAL (element) * scalar_size);
6744
6745 /* All insns should use the 'Q' constraint (address is a single register) if
6746 the element number is not a constant. */
6747 rtx addr = XEXP (mem, 0);
6748 gcc_assert (satisfies_constraint_Q (addr));
6749
6750 /* Mask the element to make sure the element number is between 0 and the
6751 maximum number of elements - 1 so that we don't generate an address
6752 outside the vector. */
6753 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6754 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6755 emit_insn (gen_rtx_SET (base_tmp, and_op));
6756
6757 /* Shift the element to get the byte offset from the element number. */
6758 int shift = exact_log2 (scalar_size);
6759 gcc_assert (shift >= 0);
6760
6761 if (shift > 0)
6762 {
6763 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6764 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6765 }
6766
6767 return base_tmp;
6768 }
6769
6770 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6771 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6772 temporary (BASE_TMP) to fixup the address. Return the new memory address
6773 that is valid for reads or writes to a given register (SCALAR_REG).
6774
6775 This function is expected to be called after reload is completed when we are
6776 splitting insns. The temporary BASE_TMP might be set multiple times with
6777 this code. */
6778
6779 rtx
6780 rs6000_adjust_vec_address (rtx scalar_reg,
6781 rtx mem,
6782 rtx element,
6783 rtx base_tmp,
6784 machine_mode scalar_mode)
6785 {
6786 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6787 rtx addr = XEXP (mem, 0);
6788 rtx new_addr;
6789
6790 gcc_assert (!reg_mentioned_p (base_tmp, addr));
6791 gcc_assert (!reg_mentioned_p (base_tmp, element));
6792
6793 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6794 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6795
6796 /* Calculate what we need to add to the address to get the element
6797 address. */
6798 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
6799
6800 /* Create the new address pointing to the element within the vector. If we
6801 are adding 0, we don't have to change the address. */
6802 if (element_offset == const0_rtx)
6803 new_addr = addr;
6804
6805 /* A simple indirect address can be converted into a reg + offset
6806 address. */
6807 else if (REG_P (addr) || SUBREG_P (addr))
6808 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6809
6810 /* Optimize D-FORM addresses with constant offset with a constant element, to
6811 include the element offset in the address directly. */
6812 else if (GET_CODE (addr) == PLUS)
6813 {
6814 rtx op0 = XEXP (addr, 0);
6815 rtx op1 = XEXP (addr, 1);
6816
6817 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6818 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6819 {
6820 /* op0 should never be r0, because r0+offset is not valid. But it
6821 doesn't hurt to make sure it is not r0. */
6822 gcc_assert (reg_or_subregno (op0) != 0);
6823
6824 /* D-FORM address with constant element number. */
6825 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6826 rtx offset_rtx = GEN_INT (offset);
6827 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6828 }
6829 else
6830 {
6831 /* If we don't have a D-FORM address with a constant element number,
6832 add the two elements in the current address. Then add the offset.
6833
6834 Previously, we tried to add the offset to OP1 and change the
6835 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6836 complicated because we had to verify that op1 was not GPR0 and we
6837 had a constant element offset (due to the way ADDI is defined).
6838 By doing the add of OP0 and OP1 first, and then adding in the
6839 offset, it has the benefit that if D-FORM instructions are
6840 allowed, the offset is part of the memory access to the vector
6841 element. */
6842 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
6843 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6844 }
6845 }
6846
6847 else
6848 {
6849 emit_move_insn (base_tmp, addr);
6850 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6851 }
6852
6853 /* If the address isn't valid, move the address into the temporary base
6854 register. Some reasons it could not be valid include:
6855
6856 The address offset overflowed the 16 or 34 bit offset size;
6857 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6858 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6859 Only X_FORM loads can be done, and the address is D_FORM. */
6860
6861 enum insn_form iform
6862 = address_to_insn_form (new_addr, scalar_mode,
6863 reg_to_non_prefixed (scalar_reg, scalar_mode));
6864
6865 if (iform == INSN_FORM_BAD)
6866 {
6867 emit_move_insn (base_tmp, new_addr);
6868 new_addr = base_tmp;
6869 }
6870
6871 return change_address (mem, scalar_mode, new_addr);
6872 }
6873
6874 /* Split a variable vec_extract operation into the component instructions. */
6875
6876 void
6877 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6878 rtx tmp_altivec)
6879 {
6880 machine_mode mode = GET_MODE (src);
6881 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6882 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6883 int byte_shift = exact_log2 (scalar_size);
6884
6885 gcc_assert (byte_shift >= 0);
6886
6887 /* If we are given a memory address, optimize to load just the element. We
6888 don't have to adjust the vector element number on little endian
6889 systems. */
6890 if (MEM_P (src))
6891 {
6892 emit_move_insn (dest,
6893 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
6894 scalar_mode));
6895 return;
6896 }
6897
6898 else if (REG_P (src) || SUBREG_P (src))
6899 {
6900 int num_elements = GET_MODE_NUNITS (mode);
6901 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6902 int bit_shift = 7 - exact_log2 (num_elements);
6903 rtx element2;
6904 unsigned int dest_regno = reg_or_subregno (dest);
6905 unsigned int src_regno = reg_or_subregno (src);
6906 unsigned int element_regno = reg_or_subregno (element);
6907
6908 gcc_assert (REG_P (tmp_gpr));
6909
6910 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6911 a general purpose register. */
6912 if (TARGET_P9_VECTOR
6913 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6914 && INT_REGNO_P (dest_regno)
6915 && ALTIVEC_REGNO_P (src_regno)
6916 && INT_REGNO_P (element_regno))
6917 {
6918 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6919 rtx element_si = gen_rtx_REG (SImode, element_regno);
6920
6921 if (mode == V16QImode)
6922 emit_insn (BYTES_BIG_ENDIAN
6923 ? gen_vextublx (dest_si, element_si, src)
6924 : gen_vextubrx (dest_si, element_si, src));
6925
6926 else if (mode == V8HImode)
6927 {
6928 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6929 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6930 emit_insn (BYTES_BIG_ENDIAN
6931 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6932 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6933 }
6934
6935
6936 else
6937 {
6938 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6939 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
6940 emit_insn (BYTES_BIG_ENDIAN
6941 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
6942 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
6943 }
6944
6945 return;
6946 }
6947
6948
6949 gcc_assert (REG_P (tmp_altivec));
6950
6951 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6952 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6953 will shift the element into the upper position (adding 3 to convert a
6954 byte shift into a bit shift). */
6955 if (scalar_size == 8)
6956 {
6957 if (!BYTES_BIG_ENDIAN)
6958 {
6959 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
6960 element2 = tmp_gpr;
6961 }
6962 else
6963 element2 = element;
6964
6965 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
6966 bit. */
6967 emit_insn (gen_rtx_SET (tmp_gpr,
6968 gen_rtx_AND (DImode,
6969 gen_rtx_ASHIFT (DImode,
6970 element2,
6971 GEN_INT (6)),
6972 GEN_INT (64))));
6973 }
6974 else
6975 {
6976 if (!BYTES_BIG_ENDIAN)
6977 {
6978 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6979
6980 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
6981 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
6982 element2 = tmp_gpr;
6983 }
6984 else
6985 element2 = element;
6986
6987 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
6988 }
6989
6990 /* Get the value into the lower byte of the Altivec register where VSLO
6991 expects it. */
6992 if (TARGET_P9_VECTOR)
6993 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
6994 else if (can_create_pseudo_p ())
6995 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
6996 else
6997 {
6998 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
6999 emit_move_insn (tmp_di, tmp_gpr);
7000 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7001 }
7002
7003 /* Do the VSLO to get the value into the final location. */
7004 switch (mode)
7005 {
7006 case E_V2DFmode:
7007 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7008 return;
7009
7010 case E_V2DImode:
7011 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7012 return;
7013
7014 case E_V4SFmode:
7015 {
7016 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7017 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7018 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7019 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7020 tmp_altivec));
7021
7022 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7023 return;
7024 }
7025
7026 case E_V4SImode:
7027 case E_V8HImode:
7028 case E_V16QImode:
7029 {
7030 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7031 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7032 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7033 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7034 tmp_altivec));
7035 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7036 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7037 GEN_INT (64 - bits_in_element)));
7038 return;
7039 }
7040
7041 default:
7042 gcc_unreachable ();
7043 }
7044
7045 return;
7046 }
7047 else
7048 gcc_unreachable ();
7049 }
7050
7051 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7052 selects whether the alignment is abi mandated, optional, or
7053 both abi and optional alignment. */
7054
7055 unsigned int
7056 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7057 {
7058 if (how != align_opt)
7059 {
7060 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7061 align = 128;
7062 }
7063
7064 if (how != align_abi)
7065 {
7066 if (TREE_CODE (type) == ARRAY_TYPE
7067 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7068 {
7069 if (align < BITS_PER_WORD)
7070 align = BITS_PER_WORD;
7071 }
7072 }
7073
7074 return align;
7075 }
7076
7077 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7078 instructions simply ignore the low bits; VSX memory instructions
7079 are aligned to 4 or 8 bytes. */
7080
7081 static bool
7082 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7083 {
7084 return (STRICT_ALIGNMENT
7085 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7086 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7087 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7088 && (int) align < VECTOR_ALIGN (mode)))));
7089 }
7090
7091 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7092
7093 bool
7094 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7095 {
7096 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7097 {
7098 if (computed != 128)
7099 {
7100 static bool warned;
7101 if (!warned && warn_psabi)
7102 {
7103 warned = true;
7104 inform (input_location,
7105 "the layout of aggregates containing vectors with"
7106 " %d-byte alignment has changed in GCC 5",
7107 computed / BITS_PER_UNIT);
7108 }
7109 }
7110 /* In current GCC there is no special case. */
7111 return false;
7112 }
7113
7114 return false;
7115 }
7116
7117 /* AIX increases natural record alignment to doubleword if the first
7118 field is an FP double while the FP fields remain word aligned. */
7119
7120 unsigned int
7121 rs6000_special_round_type_align (tree type, unsigned int computed,
7122 unsigned int specified)
7123 {
7124 unsigned int align = MAX (computed, specified);
7125 tree field = TYPE_FIELDS (type);
7126
7127 /* Skip all non field decls */
7128 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7129 field = DECL_CHAIN (field);
7130
7131 if (field != NULL && field != type)
7132 {
7133 type = TREE_TYPE (field);
7134 while (TREE_CODE (type) == ARRAY_TYPE)
7135 type = TREE_TYPE (type);
7136
7137 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7138 align = MAX (align, 64);
7139 }
7140
7141 return align;
7142 }
7143
7144 /* Darwin increases record alignment to the natural alignment of
7145 the first field. */
7146
7147 unsigned int
7148 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7149 unsigned int specified)
7150 {
7151 unsigned int align = MAX (computed, specified);
7152
7153 if (TYPE_PACKED (type))
7154 return align;
7155
7156 /* Find the first field, looking down into aggregates. */
7157 do {
7158 tree field = TYPE_FIELDS (type);
7159 /* Skip all non field decls */
7160 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7161 field = DECL_CHAIN (field);
7162 if (! field)
7163 break;
7164 /* A packed field does not contribute any extra alignment. */
7165 if (DECL_PACKED (field))
7166 return align;
7167 type = TREE_TYPE (field);
7168 while (TREE_CODE (type) == ARRAY_TYPE)
7169 type = TREE_TYPE (type);
7170 } while (AGGREGATE_TYPE_P (type));
7171
7172 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7173 align = MAX (align, TYPE_ALIGN (type));
7174
7175 return align;
7176 }
7177
7178 /* Return 1 for an operand in small memory on V.4/eabi. */
7179
7180 int
7181 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7182 machine_mode mode ATTRIBUTE_UNUSED)
7183 {
7184 #if TARGET_ELF
7185 rtx sym_ref;
7186
7187 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7188 return 0;
7189
7190 if (DEFAULT_ABI != ABI_V4)
7191 return 0;
7192
7193 if (SYMBOL_REF_P (op))
7194 sym_ref = op;
7195
7196 else if (GET_CODE (op) != CONST
7197 || GET_CODE (XEXP (op, 0)) != PLUS
7198 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7199 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7200 return 0;
7201
7202 else
7203 {
7204 rtx sum = XEXP (op, 0);
7205 HOST_WIDE_INT summand;
7206
7207 /* We have to be careful here, because it is the referenced address
7208 that must be 32k from _SDA_BASE_, not just the symbol. */
7209 summand = INTVAL (XEXP (sum, 1));
7210 if (summand < 0 || summand > g_switch_value)
7211 return 0;
7212
7213 sym_ref = XEXP (sum, 0);
7214 }
7215
7216 return SYMBOL_REF_SMALL_P (sym_ref);
7217 #else
7218 return 0;
7219 #endif
7220 }
7221
7222 /* Return true if either operand is a general purpose register. */
7223
7224 bool
7225 gpr_or_gpr_p (rtx op0, rtx op1)
7226 {
7227 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7228 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7229 }
7230
7231 /* Return true if this is a move direct operation between GPR registers and
7232 floating point/VSX registers. */
7233
7234 bool
7235 direct_move_p (rtx op0, rtx op1)
7236 {
7237 if (!REG_P (op0) || !REG_P (op1))
7238 return false;
7239
7240 if (!TARGET_DIRECT_MOVE)
7241 return false;
7242
7243 int regno0 = REGNO (op0);
7244 int regno1 = REGNO (op1);
7245 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7246 return false;
7247
7248 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7249 return true;
7250
7251 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7252 return true;
7253
7254 return false;
7255 }
7256
7257 /* Return true if the ADDR is an acceptable address for a quad memory
7258 operation of mode MODE (either LQ/STQ for general purpose registers, or
7259 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7260 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7261 3.0 LXV/STXV instruction. */
7262
7263 bool
7264 quad_address_p (rtx addr, machine_mode mode, bool strict)
7265 {
7266 rtx op0, op1;
7267
7268 if (GET_MODE_SIZE (mode) != 16)
7269 return false;
7270
7271 if (legitimate_indirect_address_p (addr, strict))
7272 return true;
7273
7274 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7275 return false;
7276
7277 /* Is this a valid prefixed address? If the bottom four bits of the offset
7278 are non-zero, we could use a prefixed instruction (which does not have the
7279 DQ-form constraint that the traditional instruction had) instead of
7280 forcing the unaligned offset to a GPR. */
7281 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7282 return true;
7283
7284 if (GET_CODE (addr) != PLUS)
7285 return false;
7286
7287 op0 = XEXP (addr, 0);
7288 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7289 return false;
7290
7291 op1 = XEXP (addr, 1);
7292 if (!CONST_INT_P (op1))
7293 return false;
7294
7295 return quad_address_offset_p (INTVAL (op1));
7296 }
7297
7298 /* Return true if this is a load or store quad operation. This function does
7299 not handle the atomic quad memory instructions. */
7300
7301 bool
7302 quad_load_store_p (rtx op0, rtx op1)
7303 {
7304 bool ret;
7305
7306 if (!TARGET_QUAD_MEMORY)
7307 ret = false;
7308
7309 else if (REG_P (op0) && MEM_P (op1))
7310 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7311 && quad_memory_operand (op1, GET_MODE (op1))
7312 && !reg_overlap_mentioned_p (op0, op1));
7313
7314 else if (MEM_P (op0) && REG_P (op1))
7315 ret = (quad_memory_operand (op0, GET_MODE (op0))
7316 && quad_int_reg_operand (op1, GET_MODE (op1)));
7317
7318 else
7319 ret = false;
7320
7321 if (TARGET_DEBUG_ADDR)
7322 {
7323 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7324 ret ? "true" : "false");
7325 debug_rtx (gen_rtx_SET (op0, op1));
7326 }
7327
7328 return ret;
7329 }
7330
7331 /* Given an address, return a constant offset term if one exists. */
7332
7333 static rtx
7334 address_offset (rtx op)
7335 {
7336 if (GET_CODE (op) == PRE_INC
7337 || GET_CODE (op) == PRE_DEC)
7338 op = XEXP (op, 0);
7339 else if (GET_CODE (op) == PRE_MODIFY
7340 || GET_CODE (op) == LO_SUM)
7341 op = XEXP (op, 1);
7342
7343 if (GET_CODE (op) == CONST)
7344 op = XEXP (op, 0);
7345
7346 if (GET_CODE (op) == PLUS)
7347 op = XEXP (op, 1);
7348
7349 if (CONST_INT_P (op))
7350 return op;
7351
7352 return NULL_RTX;
7353 }
7354
7355 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7356 the mode. If we can't find (or don't know) the alignment of the symbol
7357 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7358 should be pessimistic]. Offsets are validated in the same way as for
7359 reg + offset. */
7360 static bool
7361 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7362 {
7363 /* We should not get here with this. */
7364 gcc_checking_assert (! mode_supports_dq_form (mode));
7365
7366 if (GET_CODE (x) == CONST)
7367 x = XEXP (x, 0);
7368
7369 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7370 x = XVECEXP (x, 0, 0);
7371
7372 rtx sym = NULL_RTX;
7373 unsigned HOST_WIDE_INT offset = 0;
7374
7375 if (GET_CODE (x) == PLUS)
7376 {
7377 sym = XEXP (x, 0);
7378 if (! SYMBOL_REF_P (sym))
7379 return false;
7380 if (!CONST_INT_P (XEXP (x, 1)))
7381 return false;
7382 offset = INTVAL (XEXP (x, 1));
7383 }
7384 else if (SYMBOL_REF_P (x))
7385 sym = x;
7386 else if (CONST_INT_P (x))
7387 offset = INTVAL (x);
7388 else if (GET_CODE (x) == LABEL_REF)
7389 offset = 0; // We assume code labels are Pmode aligned
7390 else
7391 return false; // not sure what we have here.
7392
7393 /* If we don't know the alignment of the thing to which the symbol refers,
7394 we assume optimistically it is "enough".
7395 ??? maybe we should be pessimistic instead. */
7396 unsigned align = 0;
7397
7398 if (sym)
7399 {
7400 tree decl = SYMBOL_REF_DECL (sym);
7401 #if TARGET_MACHO
7402 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7403 /* The decl in an indirection symbol is the original one, which might
7404 be less aligned than the indirection. Our indirections are always
7405 pointer-aligned. */
7406 ;
7407 else
7408 #endif
7409 if (decl && DECL_ALIGN (decl))
7410 align = DECL_ALIGN_UNIT (decl);
7411 }
7412
7413 unsigned int extra = 0;
7414 switch (mode)
7415 {
7416 case E_DFmode:
7417 case E_DDmode:
7418 case E_DImode:
7419 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7420 addressing. */
7421 if (VECTOR_MEM_VSX_P (mode))
7422 return false;
7423
7424 if (!TARGET_POWERPC64)
7425 extra = 4;
7426 else if ((offset & 3) || (align & 3))
7427 return false;
7428 break;
7429
7430 case E_TFmode:
7431 case E_IFmode:
7432 case E_KFmode:
7433 case E_TDmode:
7434 case E_TImode:
7435 case E_PTImode:
7436 extra = 8;
7437 if (!TARGET_POWERPC64)
7438 extra = 12;
7439 else if ((offset & 3) || (align & 3))
7440 return false;
7441 break;
7442
7443 default:
7444 break;
7445 }
7446
7447 /* We only care if the access(es) would cause a change to the high part. */
7448 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7449 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7450 }
7451
7452 /* Return true if the MEM operand is a memory operand suitable for use
7453 with a (full width, possibly multiple) gpr load/store. On
7454 powerpc64 this means the offset must be divisible by 4.
7455 Implements 'Y' constraint.
7456
7457 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7458 a constraint function we know the operand has satisfied a suitable
7459 memory predicate.
7460
7461 Offsetting a lo_sum should not be allowed, except where we know by
7462 alignment that a 32k boundary is not crossed. Note that by
7463 "offsetting" here we mean a further offset to access parts of the
7464 MEM. It's fine to have a lo_sum where the inner address is offset
7465 from a sym, since the same sym+offset will appear in the high part
7466 of the address calculation. */
7467
7468 bool
7469 mem_operand_gpr (rtx op, machine_mode mode)
7470 {
7471 unsigned HOST_WIDE_INT offset;
7472 int extra;
7473 rtx addr = XEXP (op, 0);
7474
7475 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7476 if (TARGET_UPDATE
7477 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7478 && mode_supports_pre_incdec_p (mode)
7479 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7480 return true;
7481
7482 /* Allow prefixed instructions if supported. If the bottom two bits of the
7483 offset are non-zero, we could use a prefixed instruction (which does not
7484 have the DS-form constraint that the traditional instruction had) instead
7485 of forcing the unaligned offset to a GPR. */
7486 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7487 return true;
7488
7489 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7490 really OK. Doing this early avoids teaching all the other machinery
7491 about them. */
7492 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7493 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7494
7495 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7496 if (!rs6000_offsettable_memref_p (op, mode, false))
7497 return false;
7498
7499 op = address_offset (addr);
7500 if (op == NULL_RTX)
7501 return true;
7502
7503 offset = INTVAL (op);
7504 if (TARGET_POWERPC64 && (offset & 3) != 0)
7505 return false;
7506
7507 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7508 if (extra < 0)
7509 extra = 0;
7510
7511 if (GET_CODE (addr) == LO_SUM)
7512 /* For lo_sum addresses, we must allow any offset except one that
7513 causes a wrap, so test only the low 16 bits. */
7514 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7515
7516 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7517 }
7518
7519 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7520 enforce an offset divisible by 4 even for 32-bit. */
7521
7522 bool
7523 mem_operand_ds_form (rtx op, machine_mode mode)
7524 {
7525 unsigned HOST_WIDE_INT offset;
7526 int extra;
7527 rtx addr = XEXP (op, 0);
7528
7529 /* Allow prefixed instructions if supported. If the bottom two bits of the
7530 offset are non-zero, we could use a prefixed instruction (which does not
7531 have the DS-form constraint that the traditional instruction had) instead
7532 of forcing the unaligned offset to a GPR. */
7533 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7534 return true;
7535
7536 if (!offsettable_address_p (false, mode, addr))
7537 return false;
7538
7539 op = address_offset (addr);
7540 if (op == NULL_RTX)
7541 return true;
7542
7543 offset = INTVAL (op);
7544 if ((offset & 3) != 0)
7545 return false;
7546
7547 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7548 if (extra < 0)
7549 extra = 0;
7550
7551 if (GET_CODE (addr) == LO_SUM)
7552 /* For lo_sum addresses, we must allow any offset except one that
7553 causes a wrap, so test only the low 16 bits. */
7554 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7555
7556 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7557 }
7558 \f
7559 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7560
7561 static bool
7562 reg_offset_addressing_ok_p (machine_mode mode)
7563 {
7564 switch (mode)
7565 {
7566 case E_V16QImode:
7567 case E_V8HImode:
7568 case E_V4SFmode:
7569 case E_V4SImode:
7570 case E_V2DFmode:
7571 case E_V2DImode:
7572 case E_V1TImode:
7573 case E_TImode:
7574 case E_TFmode:
7575 case E_KFmode:
7576 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7577 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7578 a vector mode, if we want to use the VSX registers to move it around,
7579 we need to restrict ourselves to reg+reg addressing. Similarly for
7580 IEEE 128-bit floating point that is passed in a single vector
7581 register. */
7582 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7583 return mode_supports_dq_form (mode);
7584 break;
7585
7586 case E_SDmode:
7587 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7588 addressing for the LFIWZX and STFIWX instructions. */
7589 if (TARGET_NO_SDMODE_STACK)
7590 return false;
7591 break;
7592
7593 default:
7594 break;
7595 }
7596
7597 return true;
7598 }
7599
7600 static bool
7601 virtual_stack_registers_memory_p (rtx op)
7602 {
7603 int regnum;
7604
7605 if (REG_P (op))
7606 regnum = REGNO (op);
7607
7608 else if (GET_CODE (op) == PLUS
7609 && REG_P (XEXP (op, 0))
7610 && CONST_INT_P (XEXP (op, 1)))
7611 regnum = REGNO (XEXP (op, 0));
7612
7613 else
7614 return false;
7615
7616 return (regnum >= FIRST_VIRTUAL_REGISTER
7617 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7618 }
7619
7620 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7621 is known to not straddle a 32k boundary. This function is used
7622 to determine whether -mcmodel=medium code can use TOC pointer
7623 relative addressing for OP. This means the alignment of the TOC
7624 pointer must also be taken into account, and unfortunately that is
7625 only 8 bytes. */
7626
7627 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7628 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7629 #endif
7630
7631 static bool
7632 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7633 machine_mode mode)
7634 {
7635 tree decl;
7636 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7637
7638 if (!SYMBOL_REF_P (op))
7639 return false;
7640
7641 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7642 SYMBOL_REF. */
7643 if (mode_supports_dq_form (mode))
7644 return false;
7645
7646 dsize = GET_MODE_SIZE (mode);
7647 decl = SYMBOL_REF_DECL (op);
7648 if (!decl)
7649 {
7650 if (dsize == 0)
7651 return false;
7652
7653 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7654 replacing memory addresses with an anchor plus offset. We
7655 could find the decl by rummaging around in the block->objects
7656 VEC for the given offset but that seems like too much work. */
7657 dalign = BITS_PER_UNIT;
7658 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7659 && SYMBOL_REF_ANCHOR_P (op)
7660 && SYMBOL_REF_BLOCK (op) != NULL)
7661 {
7662 struct object_block *block = SYMBOL_REF_BLOCK (op);
7663
7664 dalign = block->alignment;
7665 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7666 }
7667 else if (CONSTANT_POOL_ADDRESS_P (op))
7668 {
7669 /* It would be nice to have get_pool_align().. */
7670 machine_mode cmode = get_pool_mode (op);
7671
7672 dalign = GET_MODE_ALIGNMENT (cmode);
7673 }
7674 }
7675 else if (DECL_P (decl))
7676 {
7677 dalign = DECL_ALIGN (decl);
7678
7679 if (dsize == 0)
7680 {
7681 /* Allow BLKmode when the entire object is known to not
7682 cross a 32k boundary. */
7683 if (!DECL_SIZE_UNIT (decl))
7684 return false;
7685
7686 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7687 return false;
7688
7689 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7690 if (dsize > 32768)
7691 return false;
7692
7693 dalign /= BITS_PER_UNIT;
7694 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7695 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7696 return dalign >= dsize;
7697 }
7698 }
7699 else
7700 gcc_unreachable ();
7701
7702 /* Find how many bits of the alignment we know for this access. */
7703 dalign /= BITS_PER_UNIT;
7704 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7705 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7706 mask = dalign - 1;
7707 lsb = offset & -offset;
7708 mask &= lsb - 1;
7709 dalign = mask + 1;
7710
7711 return dalign >= dsize;
7712 }
7713
7714 static bool
7715 constant_pool_expr_p (rtx op)
7716 {
7717 rtx base, offset;
7718
7719 split_const (op, &base, &offset);
7720 return (SYMBOL_REF_P (base)
7721 && CONSTANT_POOL_ADDRESS_P (base)
7722 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7723 }
7724
7725 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7726 use that as the register to put the HIGH value into if register allocation
7727 is already done. */
7728
7729 rtx
7730 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7731 {
7732 rtx tocrel, tocreg, hi;
7733
7734 gcc_assert (TARGET_TOC);
7735
7736 if (TARGET_DEBUG_ADDR)
7737 {
7738 if (SYMBOL_REF_P (symbol))
7739 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7740 XSTR (symbol, 0));
7741 else
7742 {
7743 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7744 GET_RTX_NAME (GET_CODE (symbol)));
7745 debug_rtx (symbol);
7746 }
7747 }
7748
7749 if (!can_create_pseudo_p ())
7750 df_set_regs_ever_live (TOC_REGISTER, true);
7751
7752 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7753 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7754 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7755 return tocrel;
7756
7757 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7758 if (largetoc_reg != NULL)
7759 {
7760 emit_move_insn (largetoc_reg, hi);
7761 hi = largetoc_reg;
7762 }
7763 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7764 }
7765
7766 /* These are only used to pass through from print_operand/print_operand_address
7767 to rs6000_output_addr_const_extra over the intervening function
7768 output_addr_const which is not target code. */
7769 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7770
7771 /* Return true if OP is a toc pointer relative address (the output
7772 of create_TOC_reference). If STRICT, do not match non-split
7773 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7774 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7775 TOCREL_OFFSET_RET respectively. */
7776
7777 bool
7778 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7779 const_rtx *tocrel_offset_ret)
7780 {
7781 if (!TARGET_TOC)
7782 return false;
7783
7784 if (TARGET_CMODEL != CMODEL_SMALL)
7785 {
7786 /* When strict ensure we have everything tidy. */
7787 if (strict
7788 && !(GET_CODE (op) == LO_SUM
7789 && REG_P (XEXP (op, 0))
7790 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7791 return false;
7792
7793 /* When not strict, allow non-split TOC addresses and also allow
7794 (lo_sum (high ..)) TOC addresses created during reload. */
7795 if (GET_CODE (op) == LO_SUM)
7796 op = XEXP (op, 1);
7797 }
7798
7799 const_rtx tocrel_base = op;
7800 const_rtx tocrel_offset = const0_rtx;
7801
7802 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7803 {
7804 tocrel_base = XEXP (op, 0);
7805 tocrel_offset = XEXP (op, 1);
7806 }
7807
7808 if (tocrel_base_ret)
7809 *tocrel_base_ret = tocrel_base;
7810 if (tocrel_offset_ret)
7811 *tocrel_offset_ret = tocrel_offset;
7812
7813 return (GET_CODE (tocrel_base) == UNSPEC
7814 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7815 && REG_P (XVECEXP (tocrel_base, 0, 1))
7816 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7817 }
7818
7819 /* Return true if X is a constant pool address, and also for cmodel=medium
7820 if X is a toc-relative address known to be offsettable within MODE. */
7821
7822 bool
7823 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7824 bool strict)
7825 {
7826 const_rtx tocrel_base, tocrel_offset;
7827 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7828 && (TARGET_CMODEL != CMODEL_MEDIUM
7829 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7830 || mode == QImode
7831 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7832 INTVAL (tocrel_offset), mode)));
7833 }
7834
7835 static bool
7836 legitimate_small_data_p (machine_mode mode, rtx x)
7837 {
7838 return (DEFAULT_ABI == ABI_V4
7839 && !flag_pic && !TARGET_TOC
7840 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7841 && small_data_operand (x, mode));
7842 }
7843
7844 bool
7845 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7846 bool strict, bool worst_case)
7847 {
7848 unsigned HOST_WIDE_INT offset;
7849 unsigned int extra;
7850
7851 if (GET_CODE (x) != PLUS)
7852 return false;
7853 if (!REG_P (XEXP (x, 0)))
7854 return false;
7855 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7856 return false;
7857 if (mode_supports_dq_form (mode))
7858 return quad_address_p (x, mode, strict);
7859 if (!reg_offset_addressing_ok_p (mode))
7860 return virtual_stack_registers_memory_p (x);
7861 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7862 return true;
7863 if (!CONST_INT_P (XEXP (x, 1)))
7864 return false;
7865
7866 offset = INTVAL (XEXP (x, 1));
7867 extra = 0;
7868 switch (mode)
7869 {
7870 case E_DFmode:
7871 case E_DDmode:
7872 case E_DImode:
7873 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7874 addressing. */
7875 if (VECTOR_MEM_VSX_P (mode))
7876 return false;
7877
7878 if (!worst_case)
7879 break;
7880 if (!TARGET_POWERPC64)
7881 extra = 4;
7882 else if (offset & 3)
7883 return false;
7884 break;
7885
7886 case E_TFmode:
7887 case E_IFmode:
7888 case E_KFmode:
7889 case E_TDmode:
7890 case E_TImode:
7891 case E_PTImode:
7892 extra = 8;
7893 if (!worst_case)
7894 break;
7895 if (!TARGET_POWERPC64)
7896 extra = 12;
7897 else if (offset & 3)
7898 return false;
7899 break;
7900
7901 default:
7902 break;
7903 }
7904
7905 if (TARGET_PREFIXED_ADDR)
7906 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7907 else
7908 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7909 }
7910
7911 bool
7912 legitimate_indexed_address_p (rtx x, int strict)
7913 {
7914 rtx op0, op1;
7915
7916 if (GET_CODE (x) != PLUS)
7917 return false;
7918
7919 op0 = XEXP (x, 0);
7920 op1 = XEXP (x, 1);
7921
7922 return (REG_P (op0) && REG_P (op1)
7923 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7924 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7925 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7926 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7927 }
7928
7929 bool
7930 avoiding_indexed_address_p (machine_mode mode)
7931 {
7932 /* Avoid indexed addressing for modes that have non-indexed
7933 load/store instruction forms. */
7934 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7935 }
7936
7937 bool
7938 legitimate_indirect_address_p (rtx x, int strict)
7939 {
7940 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7941 }
7942
7943 bool
7944 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7945 {
7946 if (!TARGET_MACHO || !flag_pic
7947 || mode != SImode || !MEM_P (x))
7948 return false;
7949 x = XEXP (x, 0);
7950
7951 if (GET_CODE (x) != LO_SUM)
7952 return false;
7953 if (!REG_P (XEXP (x, 0)))
7954 return false;
7955 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7956 return false;
7957 x = XEXP (x, 1);
7958
7959 return CONSTANT_P (x);
7960 }
7961
7962 static bool
7963 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7964 {
7965 if (GET_CODE (x) != LO_SUM)
7966 return false;
7967 if (!REG_P (XEXP (x, 0)))
7968 return false;
7969 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7970 return false;
7971 /* quad word addresses are restricted, and we can't use LO_SUM. */
7972 if (mode_supports_dq_form (mode))
7973 return false;
7974 x = XEXP (x, 1);
7975
7976 if (TARGET_ELF || TARGET_MACHO)
7977 {
7978 bool large_toc_ok;
7979
7980 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7981 return false;
7982 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7983 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7984 recognizes some LO_SUM addresses as valid although this
7985 function says opposite. In most cases, LRA through different
7986 transformations can generate correct code for address reloads.
7987 It cannot manage only some LO_SUM cases. So we need to add
7988 code here saying that some addresses are still valid. */
7989 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7990 && small_toc_ref (x, VOIDmode));
7991 if (TARGET_TOC && ! large_toc_ok)
7992 return false;
7993 if (GET_MODE_NUNITS (mode) != 1)
7994 return false;
7995 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7996 && !(/* ??? Assume floating point reg based on mode? */
7997 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
7998 return false;
7999
8000 return CONSTANT_P (x) || large_toc_ok;
8001 }
8002
8003 return false;
8004 }
8005
8006
8007 /* Try machine-dependent ways of modifying an illegitimate address
8008 to be legitimate. If we find one, return the new, valid address.
8009 This is used from only one place: `memory_address' in explow.c.
8010
8011 OLDX is the address as it was before break_out_memory_refs was
8012 called. In some cases it is useful to look at this to decide what
8013 needs to be done.
8014
8015 It is always safe for this function to do nothing. It exists to
8016 recognize opportunities to optimize the output.
8017
8018 On RS/6000, first check for the sum of a register with a constant
8019 integer that is out of range. If so, generate code to add the
8020 constant with the low-order 16 bits masked to the register and force
8021 this result into another register (this can be done with `cau').
8022 Then generate an address of REG+(CONST&0xffff), allowing for the
8023 possibility of bit 16 being a one.
8024
8025 Then check for the sum of a register and something not constant, try to
8026 load the other things into a register and return the sum. */
8027
8028 static rtx
8029 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8030 machine_mode mode)
8031 {
8032 unsigned int extra;
8033
8034 if (!reg_offset_addressing_ok_p (mode)
8035 || mode_supports_dq_form (mode))
8036 {
8037 if (virtual_stack_registers_memory_p (x))
8038 return x;
8039
8040 /* In theory we should not be seeing addresses of the form reg+0,
8041 but just in case it is generated, optimize it away. */
8042 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8043 return force_reg (Pmode, XEXP (x, 0));
8044
8045 /* For TImode with load/store quad, restrict addresses to just a single
8046 pointer, so it works with both GPRs and VSX registers. */
8047 /* Make sure both operands are registers. */
8048 else if (GET_CODE (x) == PLUS
8049 && (mode != TImode || !TARGET_VSX))
8050 return gen_rtx_PLUS (Pmode,
8051 force_reg (Pmode, XEXP (x, 0)),
8052 force_reg (Pmode, XEXP (x, 1)));
8053 else
8054 return force_reg (Pmode, x);
8055 }
8056 if (SYMBOL_REF_P (x))
8057 {
8058 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8059 if (model != 0)
8060 return rs6000_legitimize_tls_address (x, model);
8061 }
8062
8063 extra = 0;
8064 switch (mode)
8065 {
8066 case E_TFmode:
8067 case E_TDmode:
8068 case E_TImode:
8069 case E_PTImode:
8070 case E_IFmode:
8071 case E_KFmode:
8072 /* As in legitimate_offset_address_p we do not assume
8073 worst-case. The mode here is just a hint as to the registers
8074 used. A TImode is usually in gprs, but may actually be in
8075 fprs. Leave worst-case scenario for reload to handle via
8076 insn constraints. PTImode is only GPRs. */
8077 extra = 8;
8078 break;
8079 default:
8080 break;
8081 }
8082
8083 if (GET_CODE (x) == PLUS
8084 && REG_P (XEXP (x, 0))
8085 && CONST_INT_P (XEXP (x, 1))
8086 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8087 >= 0x10000 - extra))
8088 {
8089 HOST_WIDE_INT high_int, low_int;
8090 rtx sum;
8091 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8092 if (low_int >= 0x8000 - extra)
8093 low_int = 0;
8094 high_int = INTVAL (XEXP (x, 1)) - low_int;
8095 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8096 GEN_INT (high_int)), 0);
8097 return plus_constant (Pmode, sum, low_int);
8098 }
8099 else if (GET_CODE (x) == PLUS
8100 && REG_P (XEXP (x, 0))
8101 && !CONST_INT_P (XEXP (x, 1))
8102 && GET_MODE_NUNITS (mode) == 1
8103 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8104 || (/* ??? Assume floating point reg based on mode? */
8105 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8106 && !avoiding_indexed_address_p (mode))
8107 {
8108 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8109 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8110 }
8111 else if ((TARGET_ELF
8112 #if TARGET_MACHO
8113 || !MACHO_DYNAMIC_NO_PIC_P
8114 #endif
8115 )
8116 && TARGET_32BIT
8117 && TARGET_NO_TOC_OR_PCREL
8118 && !flag_pic
8119 && !CONST_INT_P (x)
8120 && !CONST_WIDE_INT_P (x)
8121 && !CONST_DOUBLE_P (x)
8122 && CONSTANT_P (x)
8123 && GET_MODE_NUNITS (mode) == 1
8124 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8125 || (/* ??? Assume floating point reg based on mode? */
8126 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8127 {
8128 rtx reg = gen_reg_rtx (Pmode);
8129 if (TARGET_ELF)
8130 emit_insn (gen_elf_high (reg, x));
8131 else
8132 emit_insn (gen_macho_high (Pmode, reg, x));
8133 return gen_rtx_LO_SUM (Pmode, reg, x);
8134 }
8135 else if (TARGET_TOC
8136 && SYMBOL_REF_P (x)
8137 && constant_pool_expr_p (x)
8138 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8139 return create_TOC_reference (x, NULL_RTX);
8140 else
8141 return x;
8142 }
8143
8144 /* Debug version of rs6000_legitimize_address. */
8145 static rtx
8146 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8147 {
8148 rtx ret;
8149 rtx_insn *insns;
8150
8151 start_sequence ();
8152 ret = rs6000_legitimize_address (x, oldx, mode);
8153 insns = get_insns ();
8154 end_sequence ();
8155
8156 if (ret != x)
8157 {
8158 fprintf (stderr,
8159 "\nrs6000_legitimize_address: mode %s, old code %s, "
8160 "new code %s, modified\n",
8161 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8162 GET_RTX_NAME (GET_CODE (ret)));
8163
8164 fprintf (stderr, "Original address:\n");
8165 debug_rtx (x);
8166
8167 fprintf (stderr, "oldx:\n");
8168 debug_rtx (oldx);
8169
8170 fprintf (stderr, "New address:\n");
8171 debug_rtx (ret);
8172
8173 if (insns)
8174 {
8175 fprintf (stderr, "Insns added:\n");
8176 debug_rtx_list (insns, 20);
8177 }
8178 }
8179 else
8180 {
8181 fprintf (stderr,
8182 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8183 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8184
8185 debug_rtx (x);
8186 }
8187
8188 if (insns)
8189 emit_insn (insns);
8190
8191 return ret;
8192 }
8193
8194 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8195 We need to emit DTP-relative relocations. */
8196
8197 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8198 static void
8199 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8200 {
8201 switch (size)
8202 {
8203 case 4:
8204 fputs ("\t.long\t", file);
8205 break;
8206 case 8:
8207 fputs (DOUBLE_INT_ASM_OP, file);
8208 break;
8209 default:
8210 gcc_unreachable ();
8211 }
8212 output_addr_const (file, x);
8213 if (TARGET_ELF)
8214 fputs ("@dtprel+0x8000", file);
8215 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8216 {
8217 switch (SYMBOL_REF_TLS_MODEL (x))
8218 {
8219 case 0:
8220 break;
8221 case TLS_MODEL_LOCAL_EXEC:
8222 fputs ("@le", file);
8223 break;
8224 case TLS_MODEL_INITIAL_EXEC:
8225 fputs ("@ie", file);
8226 break;
8227 case TLS_MODEL_GLOBAL_DYNAMIC:
8228 case TLS_MODEL_LOCAL_DYNAMIC:
8229 fputs ("@m", file);
8230 break;
8231 default:
8232 gcc_unreachable ();
8233 }
8234 }
8235 }
8236
8237 /* Return true if X is a symbol that refers to real (rather than emulated)
8238 TLS. */
8239
8240 static bool
8241 rs6000_real_tls_symbol_ref_p (rtx x)
8242 {
8243 return (SYMBOL_REF_P (x)
8244 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8245 }
8246
8247 /* In the name of slightly smaller debug output, and to cater to
8248 general assembler lossage, recognize various UNSPEC sequences
8249 and turn them back into a direct symbol reference. */
8250
8251 static rtx
8252 rs6000_delegitimize_address (rtx orig_x)
8253 {
8254 rtx x, y, offset;
8255
8256 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8257 orig_x = XVECEXP (orig_x, 0, 0);
8258
8259 orig_x = delegitimize_mem_from_attrs (orig_x);
8260
8261 x = orig_x;
8262 if (MEM_P (x))
8263 x = XEXP (x, 0);
8264
8265 y = x;
8266 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8267 y = XEXP (y, 1);
8268
8269 offset = NULL_RTX;
8270 if (GET_CODE (y) == PLUS
8271 && GET_MODE (y) == Pmode
8272 && CONST_INT_P (XEXP (y, 1)))
8273 {
8274 offset = XEXP (y, 1);
8275 y = XEXP (y, 0);
8276 }
8277
8278 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8279 {
8280 y = XVECEXP (y, 0, 0);
8281
8282 #ifdef HAVE_AS_TLS
8283 /* Do not associate thread-local symbols with the original
8284 constant pool symbol. */
8285 if (TARGET_XCOFF
8286 && SYMBOL_REF_P (y)
8287 && CONSTANT_POOL_ADDRESS_P (y)
8288 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8289 return orig_x;
8290 #endif
8291
8292 if (offset != NULL_RTX)
8293 y = gen_rtx_PLUS (Pmode, y, offset);
8294 if (!MEM_P (orig_x))
8295 return y;
8296 else
8297 return replace_equiv_address_nv (orig_x, y);
8298 }
8299
8300 if (TARGET_MACHO
8301 && GET_CODE (orig_x) == LO_SUM
8302 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8303 {
8304 y = XEXP (XEXP (orig_x, 1), 0);
8305 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8306 return XVECEXP (y, 0, 0);
8307 }
8308
8309 return orig_x;
8310 }
8311
8312 /* Return true if X shouldn't be emitted into the debug info.
8313 The linker doesn't like .toc section references from
8314 .debug_* sections, so reject .toc section symbols. */
8315
8316 static bool
8317 rs6000_const_not_ok_for_debug_p (rtx x)
8318 {
8319 if (GET_CODE (x) == UNSPEC)
8320 return true;
8321 if (SYMBOL_REF_P (x)
8322 && CONSTANT_POOL_ADDRESS_P (x))
8323 {
8324 rtx c = get_pool_constant (x);
8325 machine_mode cmode = get_pool_mode (x);
8326 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8327 return true;
8328 }
8329
8330 return false;
8331 }
8332
8333 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8334
8335 static bool
8336 rs6000_legitimate_combined_insn (rtx_insn *insn)
8337 {
8338 int icode = INSN_CODE (insn);
8339
8340 /* Reject creating doloop insns. Combine should not be allowed
8341 to create these for a number of reasons:
8342 1) In a nested loop, if combine creates one of these in an
8343 outer loop and the register allocator happens to allocate ctr
8344 to the outer loop insn, then the inner loop can't use ctr.
8345 Inner loops ought to be more highly optimized.
8346 2) Combine often wants to create one of these from what was
8347 originally a three insn sequence, first combining the three
8348 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8349 allocated ctr, the splitter takes use back to the three insn
8350 sequence. It's better to stop combine at the two insn
8351 sequence.
8352 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8353 insns, the register allocator sometimes uses floating point
8354 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8355 jump insn and output reloads are not implemented for jumps,
8356 the ctrsi/ctrdi splitters need to handle all possible cases.
8357 That's a pain, and it gets to be seriously difficult when a
8358 splitter that runs after reload needs memory to transfer from
8359 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8360 for the difficult case. It's better to not create problems
8361 in the first place. */
8362 if (icode != CODE_FOR_nothing
8363 && (icode == CODE_FOR_bdz_si
8364 || icode == CODE_FOR_bdz_di
8365 || icode == CODE_FOR_bdnz_si
8366 || icode == CODE_FOR_bdnz_di
8367 || icode == CODE_FOR_bdztf_si
8368 || icode == CODE_FOR_bdztf_di
8369 || icode == CODE_FOR_bdnztf_si
8370 || icode == CODE_FOR_bdnztf_di))
8371 return false;
8372
8373 return true;
8374 }
8375
8376 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8377
8378 static GTY(()) rtx rs6000_tls_symbol;
8379 static rtx
8380 rs6000_tls_get_addr (void)
8381 {
8382 if (!rs6000_tls_symbol)
8383 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8384
8385 return rs6000_tls_symbol;
8386 }
8387
8388 /* Construct the SYMBOL_REF for TLS GOT references. */
8389
8390 static GTY(()) rtx rs6000_got_symbol;
8391 rtx
8392 rs6000_got_sym (void)
8393 {
8394 if (!rs6000_got_symbol)
8395 {
8396 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8397 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8398 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8399 }
8400
8401 return rs6000_got_symbol;
8402 }
8403
8404 /* AIX Thread-Local Address support. */
8405
8406 static rtx
8407 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8408 {
8409 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8410 const char *name;
8411 char *tlsname;
8412
8413 name = XSTR (addr, 0);
8414 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8415 or the symbol will be in TLS private data section. */
8416 if (name[strlen (name) - 1] != ']'
8417 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8418 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8419 {
8420 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8421 strcpy (tlsname, name);
8422 strcat (tlsname,
8423 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8424 tlsaddr = copy_rtx (addr);
8425 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8426 }
8427 else
8428 tlsaddr = addr;
8429
8430 /* Place addr into TOC constant pool. */
8431 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8432
8433 /* Output the TOC entry and create the MEM referencing the value. */
8434 if (constant_pool_expr_p (XEXP (sym, 0))
8435 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8436 {
8437 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8438 mem = gen_const_mem (Pmode, tocref);
8439 set_mem_alias_set (mem, get_TOC_alias_set ());
8440 }
8441 else
8442 return sym;
8443
8444 /* Use global-dynamic for local-dynamic. */
8445 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8446 || model == TLS_MODEL_LOCAL_DYNAMIC)
8447 {
8448 /* Create new TOC reference for @m symbol. */
8449 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8450 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8451 strcpy (tlsname, "*LCM");
8452 strcat (tlsname, name + 3);
8453 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8454 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8455 tocref = create_TOC_reference (modaddr, NULL_RTX);
8456 rtx modmem = gen_const_mem (Pmode, tocref);
8457 set_mem_alias_set (modmem, get_TOC_alias_set ());
8458
8459 rtx modreg = gen_reg_rtx (Pmode);
8460 emit_insn (gen_rtx_SET (modreg, modmem));
8461
8462 tmpreg = gen_reg_rtx (Pmode);
8463 emit_insn (gen_rtx_SET (tmpreg, mem));
8464
8465 dest = gen_reg_rtx (Pmode);
8466 if (TARGET_32BIT)
8467 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8468 else
8469 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8470 return dest;
8471 }
8472 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8473 else if (TARGET_32BIT)
8474 {
8475 tlsreg = gen_reg_rtx (SImode);
8476 emit_insn (gen_tls_get_tpointer (tlsreg));
8477 }
8478 else
8479 tlsreg = gen_rtx_REG (DImode, 13);
8480
8481 /* Load the TOC value into temporary register. */
8482 tmpreg = gen_reg_rtx (Pmode);
8483 emit_insn (gen_rtx_SET (tmpreg, mem));
8484 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8485 gen_rtx_MINUS (Pmode, addr, tlsreg));
8486
8487 /* Add TOC symbol value to TLS pointer. */
8488 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8489
8490 return dest;
8491 }
8492
8493 /* Passes the tls arg value for global dynamic and local dynamic
8494 emit_library_call_value in rs6000_legitimize_tls_address to
8495 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8496 marker relocs put on __tls_get_addr calls. */
8497 static rtx global_tlsarg;
8498
8499 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8500 this (thread-local) address. */
8501
8502 static rtx
8503 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8504 {
8505 rtx dest, insn;
8506
8507 if (TARGET_XCOFF)
8508 return rs6000_legitimize_tls_address_aix (addr, model);
8509
8510 dest = gen_reg_rtx (Pmode);
8511 if (model == TLS_MODEL_LOCAL_EXEC
8512 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8513 {
8514 rtx tlsreg;
8515
8516 if (TARGET_64BIT)
8517 {
8518 tlsreg = gen_rtx_REG (Pmode, 13);
8519 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8520 }
8521 else
8522 {
8523 tlsreg = gen_rtx_REG (Pmode, 2);
8524 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8525 }
8526 emit_insn (insn);
8527 }
8528 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8529 {
8530 rtx tlsreg, tmp;
8531
8532 tmp = gen_reg_rtx (Pmode);
8533 if (TARGET_64BIT)
8534 {
8535 tlsreg = gen_rtx_REG (Pmode, 13);
8536 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8537 }
8538 else
8539 {
8540 tlsreg = gen_rtx_REG (Pmode, 2);
8541 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8542 }
8543 emit_insn (insn);
8544 if (TARGET_64BIT)
8545 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8546 else
8547 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8548 emit_insn (insn);
8549 }
8550 else
8551 {
8552 rtx got, tga, tmp1, tmp2;
8553
8554 /* We currently use relocations like @got@tlsgd for tls, which
8555 means the linker will handle allocation of tls entries, placing
8556 them in the .got section. So use a pointer to the .got section,
8557 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8558 or to secondary GOT sections used by 32-bit -fPIC. */
8559 if (rs6000_pcrel_p (cfun))
8560 got = const0_rtx;
8561 else if (TARGET_64BIT)
8562 got = gen_rtx_REG (Pmode, 2);
8563 else
8564 {
8565 if (flag_pic == 1)
8566 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8567 else
8568 {
8569 rtx gsym = rs6000_got_sym ();
8570 got = gen_reg_rtx (Pmode);
8571 if (flag_pic == 0)
8572 rs6000_emit_move (got, gsym, Pmode);
8573 else
8574 {
8575 rtx mem, lab;
8576
8577 tmp1 = gen_reg_rtx (Pmode);
8578 tmp2 = gen_reg_rtx (Pmode);
8579 mem = gen_const_mem (Pmode, tmp1);
8580 lab = gen_label_rtx ();
8581 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8582 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8583 if (TARGET_LINK_STACK)
8584 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8585 emit_move_insn (tmp2, mem);
8586 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8587 set_unique_reg_note (last, REG_EQUAL, gsym);
8588 }
8589 }
8590 }
8591
8592 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8593 {
8594 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8595 UNSPEC_TLSGD);
8596 tga = rs6000_tls_get_addr ();
8597 rtx argreg = gen_rtx_REG (Pmode, 3);
8598 emit_insn (gen_rtx_SET (argreg, arg));
8599 global_tlsarg = arg;
8600 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8601 global_tlsarg = NULL_RTX;
8602
8603 /* Make a note so that the result of this call can be CSEd. */
8604 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8605 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8606 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8607 }
8608 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8609 {
8610 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8611 tga = rs6000_tls_get_addr ();
8612 tmp1 = gen_reg_rtx (Pmode);
8613 rtx argreg = gen_rtx_REG (Pmode, 3);
8614 emit_insn (gen_rtx_SET (argreg, arg));
8615 global_tlsarg = arg;
8616 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8617 global_tlsarg = NULL_RTX;
8618
8619 /* Make a note so that the result of this call can be CSEd. */
8620 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8621 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8622 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8623
8624 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8625 {
8626 if (TARGET_64BIT)
8627 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8628 else
8629 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8630 }
8631 else if (rs6000_tls_size == 32)
8632 {
8633 tmp2 = gen_reg_rtx (Pmode);
8634 if (TARGET_64BIT)
8635 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8636 else
8637 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8638 emit_insn (insn);
8639 if (TARGET_64BIT)
8640 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8641 else
8642 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8643 }
8644 else
8645 {
8646 tmp2 = gen_reg_rtx (Pmode);
8647 if (TARGET_64BIT)
8648 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8649 else
8650 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8651 emit_insn (insn);
8652 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8653 }
8654 emit_insn (insn);
8655 }
8656 else
8657 {
8658 /* IE, or 64-bit offset LE. */
8659 tmp2 = gen_reg_rtx (Pmode);
8660 if (TARGET_64BIT)
8661 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8662 else
8663 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8664 emit_insn (insn);
8665 if (rs6000_pcrel_p (cfun))
8666 {
8667 if (TARGET_64BIT)
8668 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8669 else
8670 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8671 }
8672 else if (TARGET_64BIT)
8673 insn = gen_tls_tls_64 (dest, tmp2, addr);
8674 else
8675 insn = gen_tls_tls_32 (dest, tmp2, addr);
8676 emit_insn (insn);
8677 }
8678 }
8679
8680 return dest;
8681 }
8682
8683 /* Only create the global variable for the stack protect guard if we are using
8684 the global flavor of that guard. */
8685 static tree
8686 rs6000_init_stack_protect_guard (void)
8687 {
8688 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8689 return default_stack_protect_guard ();
8690
8691 return NULL_TREE;
8692 }
8693
8694 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8695
8696 static bool
8697 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8698 {
8699 if (GET_CODE (x) == HIGH
8700 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8701 return true;
8702
8703 /* A TLS symbol in the TOC cannot contain a sum. */
8704 if (GET_CODE (x) == CONST
8705 && GET_CODE (XEXP (x, 0)) == PLUS
8706 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8707 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8708 return true;
8709
8710 /* Do not place an ELF TLS symbol in the constant pool. */
8711 return TARGET_ELF && tls_referenced_p (x);
8712 }
8713
8714 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8715 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8716 can be addressed relative to the toc pointer. */
8717
8718 static bool
8719 use_toc_relative_ref (rtx sym, machine_mode mode)
8720 {
8721 return ((constant_pool_expr_p (sym)
8722 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8723 get_pool_mode (sym)))
8724 || (TARGET_CMODEL == CMODEL_MEDIUM
8725 && SYMBOL_REF_LOCAL_P (sym)
8726 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8727 }
8728
8729 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8730 that is a valid memory address for an instruction.
8731 The MODE argument is the machine mode for the MEM expression
8732 that wants to use this address.
8733
8734 On the RS/6000, there are four valid address: a SYMBOL_REF that
8735 refers to a constant pool entry of an address (or the sum of it
8736 plus a constant), a short (16-bit signed) constant plus a register,
8737 the sum of two registers, or a register indirect, possibly with an
8738 auto-increment. For DFmode, DDmode and DImode with a constant plus
8739 register, we must ensure that both words are addressable or PowerPC64
8740 with offset word aligned.
8741
8742 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8743 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8744 because adjacent memory cells are accessed by adding word-sized offsets
8745 during assembly output. */
8746 static bool
8747 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8748 {
8749 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8750 bool quad_offset_p = mode_supports_dq_form (mode);
8751
8752 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8753 if (VECTOR_MEM_ALTIVEC_P (mode)
8754 && GET_CODE (x) == AND
8755 && CONST_INT_P (XEXP (x, 1))
8756 && INTVAL (XEXP (x, 1)) == -16)
8757 x = XEXP (x, 0);
8758
8759 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8760 return 0;
8761 if (legitimate_indirect_address_p (x, reg_ok_strict))
8762 return 1;
8763 if (TARGET_UPDATE
8764 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8765 && mode_supports_pre_incdec_p (mode)
8766 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8767 return 1;
8768
8769 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8770 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8771 return 1;
8772
8773 /* Handle restricted vector d-form offsets in ISA 3.0. */
8774 if (quad_offset_p)
8775 {
8776 if (quad_address_p (x, mode, reg_ok_strict))
8777 return 1;
8778 }
8779 else if (virtual_stack_registers_memory_p (x))
8780 return 1;
8781
8782 else if (reg_offset_p)
8783 {
8784 if (legitimate_small_data_p (mode, x))
8785 return 1;
8786 if (legitimate_constant_pool_address_p (x, mode,
8787 reg_ok_strict || lra_in_progress))
8788 return 1;
8789 }
8790
8791 /* For TImode, if we have TImode in VSX registers, only allow register
8792 indirect addresses. This will allow the values to go in either GPRs
8793 or VSX registers without reloading. The vector types would tend to
8794 go into VSX registers, so we allow REG+REG, while TImode seems
8795 somewhat split, in that some uses are GPR based, and some VSX based. */
8796 /* FIXME: We could loosen this by changing the following to
8797 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8798 but currently we cannot allow REG+REG addressing for TImode. See
8799 PR72827 for complete details on how this ends up hoodwinking DSE. */
8800 if (mode == TImode && TARGET_VSX)
8801 return 0;
8802 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8803 if (! reg_ok_strict
8804 && reg_offset_p
8805 && GET_CODE (x) == PLUS
8806 && REG_P (XEXP (x, 0))
8807 && (XEXP (x, 0) == virtual_stack_vars_rtx
8808 || XEXP (x, 0) == arg_pointer_rtx)
8809 && CONST_INT_P (XEXP (x, 1)))
8810 return 1;
8811 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8812 return 1;
8813 if (!FLOAT128_2REG_P (mode)
8814 && (TARGET_HARD_FLOAT
8815 || TARGET_POWERPC64
8816 || (mode != DFmode && mode != DDmode))
8817 && (TARGET_POWERPC64 || mode != DImode)
8818 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8819 && mode != PTImode
8820 && !avoiding_indexed_address_p (mode)
8821 && legitimate_indexed_address_p (x, reg_ok_strict))
8822 return 1;
8823 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8824 && mode_supports_pre_modify_p (mode)
8825 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8826 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8827 reg_ok_strict, false)
8828 || (!avoiding_indexed_address_p (mode)
8829 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8830 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8831 {
8832 /* There is no prefixed version of the load/store with update. */
8833 rtx addr = XEXP (x, 1);
8834 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8835 }
8836 if (reg_offset_p && !quad_offset_p
8837 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8838 return 1;
8839 return 0;
8840 }
8841
8842 /* Debug version of rs6000_legitimate_address_p. */
8843 static bool
8844 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8845 bool reg_ok_strict)
8846 {
8847 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8848 fprintf (stderr,
8849 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8850 "strict = %d, reload = %s, code = %s\n",
8851 ret ? "true" : "false",
8852 GET_MODE_NAME (mode),
8853 reg_ok_strict,
8854 (reload_completed ? "after" : "before"),
8855 GET_RTX_NAME (GET_CODE (x)));
8856 debug_rtx (x);
8857
8858 return ret;
8859 }
8860
8861 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8862
8863 static bool
8864 rs6000_mode_dependent_address_p (const_rtx addr,
8865 addr_space_t as ATTRIBUTE_UNUSED)
8866 {
8867 return rs6000_mode_dependent_address_ptr (addr);
8868 }
8869
8870 /* Go to LABEL if ADDR (a legitimate address expression)
8871 has an effect that depends on the machine mode it is used for.
8872
8873 On the RS/6000 this is true of all integral offsets (since AltiVec
8874 and VSX modes don't allow them) or is a pre-increment or decrement.
8875
8876 ??? Except that due to conceptual problems in offsettable_address_p
8877 we can't really report the problems of integral offsets. So leave
8878 this assuming that the adjustable offset must be valid for the
8879 sub-words of a TFmode operand, which is what we had before. */
8880
8881 static bool
8882 rs6000_mode_dependent_address (const_rtx addr)
8883 {
8884 switch (GET_CODE (addr))
8885 {
8886 case PLUS:
8887 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8888 is considered a legitimate address before reload, so there
8889 are no offset restrictions in that case. Note that this
8890 condition is safe in strict mode because any address involving
8891 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8892 been rejected as illegitimate. */
8893 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8894 && XEXP (addr, 0) != arg_pointer_rtx
8895 && CONST_INT_P (XEXP (addr, 1)))
8896 {
8897 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8898 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8899 if (TARGET_PREFIXED_ADDR)
8900 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8901 else
8902 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8903 }
8904 break;
8905
8906 case LO_SUM:
8907 /* Anything in the constant pool is sufficiently aligned that
8908 all bytes have the same high part address. */
8909 return !legitimate_constant_pool_address_p (addr, QImode, false);
8910
8911 /* Auto-increment cases are now treated generically in recog.c. */
8912 case PRE_MODIFY:
8913 return TARGET_UPDATE;
8914
8915 /* AND is only allowed in Altivec loads. */
8916 case AND:
8917 return true;
8918
8919 default:
8920 break;
8921 }
8922
8923 return false;
8924 }
8925
8926 /* Debug version of rs6000_mode_dependent_address. */
8927 static bool
8928 rs6000_debug_mode_dependent_address (const_rtx addr)
8929 {
8930 bool ret = rs6000_mode_dependent_address (addr);
8931
8932 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8933 ret ? "true" : "false");
8934 debug_rtx (addr);
8935
8936 return ret;
8937 }
8938
8939 /* Implement FIND_BASE_TERM. */
8940
8941 rtx
8942 rs6000_find_base_term (rtx op)
8943 {
8944 rtx base;
8945
8946 base = op;
8947 if (GET_CODE (base) == CONST)
8948 base = XEXP (base, 0);
8949 if (GET_CODE (base) == PLUS)
8950 base = XEXP (base, 0);
8951 if (GET_CODE (base) == UNSPEC)
8952 switch (XINT (base, 1))
8953 {
8954 case UNSPEC_TOCREL:
8955 case UNSPEC_MACHOPIC_OFFSET:
8956 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8957 for aliasing purposes. */
8958 return XVECEXP (base, 0, 0);
8959 }
8960
8961 return op;
8962 }
8963
8964 /* More elaborate version of recog's offsettable_memref_p predicate
8965 that works around the ??? note of rs6000_mode_dependent_address.
8966 In particular it accepts
8967
8968 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8969
8970 in 32-bit mode, that the recog predicate rejects. */
8971
8972 static bool
8973 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8974 {
8975 bool worst_case;
8976
8977 if (!MEM_P (op))
8978 return false;
8979
8980 /* First mimic offsettable_memref_p. */
8981 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
8982 return true;
8983
8984 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8985 the latter predicate knows nothing about the mode of the memory
8986 reference and, therefore, assumes that it is the largest supported
8987 mode (TFmode). As a consequence, legitimate offsettable memory
8988 references are rejected. rs6000_legitimate_offset_address_p contains
8989 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8990 at least with a little bit of help here given that we know the
8991 actual registers used. */
8992 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8993 || GET_MODE_SIZE (reg_mode) == 4);
8994 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8995 strict, worst_case);
8996 }
8997
8998 /* Determine the reassociation width to be used in reassociate_bb.
8999 This takes into account how many parallel operations we
9000 can actually do of a given type, and also the latency.
9001 P8:
9002 int add/sub 6/cycle
9003 mul 2/cycle
9004 vect add/sub/mul 2/cycle
9005 fp add/sub/mul 2/cycle
9006 dfp 1/cycle
9007 */
9008
9009 static int
9010 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9011 machine_mode mode)
9012 {
9013 switch (rs6000_tune)
9014 {
9015 case PROCESSOR_POWER8:
9016 case PROCESSOR_POWER9:
9017 case PROCESSOR_FUTURE:
9018 if (DECIMAL_FLOAT_MODE_P (mode))
9019 return 1;
9020 if (VECTOR_MODE_P (mode))
9021 return 4;
9022 if (INTEGRAL_MODE_P (mode))
9023 return 1;
9024 if (FLOAT_MODE_P (mode))
9025 return 4;
9026 break;
9027 default:
9028 break;
9029 }
9030 return 1;
9031 }
9032
9033 /* Change register usage conditional on target flags. */
9034 static void
9035 rs6000_conditional_register_usage (void)
9036 {
9037 int i;
9038
9039 if (TARGET_DEBUG_TARGET)
9040 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9041
9042 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9043 if (TARGET_64BIT)
9044 fixed_regs[13] = call_used_regs[13] = 1;
9045
9046 /* Conditionally disable FPRs. */
9047 if (TARGET_SOFT_FLOAT)
9048 for (i = 32; i < 64; i++)
9049 fixed_regs[i] = call_used_regs[i] = 1;
9050
9051 /* The TOC register is not killed across calls in a way that is
9052 visible to the compiler. */
9053 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9054 call_used_regs[2] = 0;
9055
9056 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9057 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9058
9059 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9060 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9061 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9062
9063 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9064 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9065 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9066
9067 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9068 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9069
9070 if (!TARGET_ALTIVEC && !TARGET_VSX)
9071 {
9072 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9073 fixed_regs[i] = call_used_regs[i] = 1;
9074 call_used_regs[VRSAVE_REGNO] = 1;
9075 }
9076
9077 if (TARGET_ALTIVEC || TARGET_VSX)
9078 global_regs[VSCR_REGNO] = 1;
9079
9080 if (TARGET_ALTIVEC_ABI)
9081 {
9082 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9083 call_used_regs[i] = 1;
9084
9085 /* AIX reserves VR20:31 in non-extended ABI mode. */
9086 if (TARGET_XCOFF)
9087 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9088 fixed_regs[i] = call_used_regs[i] = 1;
9089 }
9090 }
9091
9092 \f
9093 /* Output insns to set DEST equal to the constant SOURCE as a series of
9094 lis, ori and shl instructions and return TRUE. */
9095
9096 bool
9097 rs6000_emit_set_const (rtx dest, rtx source)
9098 {
9099 machine_mode mode = GET_MODE (dest);
9100 rtx temp, set;
9101 rtx_insn *insn;
9102 HOST_WIDE_INT c;
9103
9104 gcc_checking_assert (CONST_INT_P (source));
9105 c = INTVAL (source);
9106 switch (mode)
9107 {
9108 case E_QImode:
9109 case E_HImode:
9110 emit_insn (gen_rtx_SET (dest, source));
9111 return true;
9112
9113 case E_SImode:
9114 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9115
9116 emit_insn (gen_rtx_SET (copy_rtx (temp),
9117 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9118 emit_insn (gen_rtx_SET (dest,
9119 gen_rtx_IOR (SImode, copy_rtx (temp),
9120 GEN_INT (c & 0xffff))));
9121 break;
9122
9123 case E_DImode:
9124 if (!TARGET_POWERPC64)
9125 {
9126 rtx hi, lo;
9127
9128 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9129 DImode);
9130 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9131 DImode);
9132 emit_move_insn (hi, GEN_INT (c >> 32));
9133 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9134 emit_move_insn (lo, GEN_INT (c));
9135 }
9136 else
9137 rs6000_emit_set_long_const (dest, c);
9138 break;
9139
9140 default:
9141 gcc_unreachable ();
9142 }
9143
9144 insn = get_last_insn ();
9145 set = single_set (insn);
9146 if (! CONSTANT_P (SET_SRC (set)))
9147 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9148
9149 return true;
9150 }
9151
9152 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9153 Output insns to set DEST equal to the constant C as a series of
9154 lis, ori and shl instructions. */
9155
9156 static void
9157 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9158 {
9159 rtx temp;
9160 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9161
9162 ud1 = c & 0xffff;
9163 c = c >> 16;
9164 ud2 = c & 0xffff;
9165 c = c >> 16;
9166 ud3 = c & 0xffff;
9167 c = c >> 16;
9168 ud4 = c & 0xffff;
9169
9170 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9171 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9172 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9173
9174 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9175 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9176 {
9177 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9178
9179 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9180 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9181 if (ud1 != 0)
9182 emit_move_insn (dest,
9183 gen_rtx_IOR (DImode, copy_rtx (temp),
9184 GEN_INT (ud1)));
9185 }
9186 else if (ud3 == 0 && ud4 == 0)
9187 {
9188 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9189
9190 gcc_assert (ud2 & 0x8000);
9191 emit_move_insn (copy_rtx (temp),
9192 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9193 if (ud1 != 0)
9194 emit_move_insn (copy_rtx (temp),
9195 gen_rtx_IOR (DImode, copy_rtx (temp),
9196 GEN_INT (ud1)));
9197 emit_move_insn (dest,
9198 gen_rtx_ZERO_EXTEND (DImode,
9199 gen_lowpart (SImode,
9200 copy_rtx (temp))));
9201 }
9202 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9203 || (ud4 == 0 && ! (ud3 & 0x8000)))
9204 {
9205 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9206
9207 emit_move_insn (copy_rtx (temp),
9208 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9209 if (ud2 != 0)
9210 emit_move_insn (copy_rtx (temp),
9211 gen_rtx_IOR (DImode, copy_rtx (temp),
9212 GEN_INT (ud2)));
9213 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9214 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9215 GEN_INT (16)));
9216 if (ud1 != 0)
9217 emit_move_insn (dest,
9218 gen_rtx_IOR (DImode, copy_rtx (temp),
9219 GEN_INT (ud1)));
9220 }
9221 else
9222 {
9223 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9224
9225 emit_move_insn (copy_rtx (temp),
9226 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9227 if (ud3 != 0)
9228 emit_move_insn (copy_rtx (temp),
9229 gen_rtx_IOR (DImode, copy_rtx (temp),
9230 GEN_INT (ud3)));
9231
9232 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9233 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9234 GEN_INT (32)));
9235 if (ud2 != 0)
9236 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9237 gen_rtx_IOR (DImode, copy_rtx (temp),
9238 GEN_INT (ud2 << 16)));
9239 if (ud1 != 0)
9240 emit_move_insn (dest,
9241 gen_rtx_IOR (DImode, copy_rtx (temp),
9242 GEN_INT (ud1)));
9243 }
9244 }
9245
9246 /* Helper for the following. Get rid of [r+r] memory refs
9247 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9248
9249 static void
9250 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9251 {
9252 if (MEM_P (operands[0])
9253 && !REG_P (XEXP (operands[0], 0))
9254 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9255 GET_MODE (operands[0]), false))
9256 operands[0]
9257 = replace_equiv_address (operands[0],
9258 copy_addr_to_reg (XEXP (operands[0], 0)));
9259
9260 if (MEM_P (operands[1])
9261 && !REG_P (XEXP (operands[1], 0))
9262 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9263 GET_MODE (operands[1]), false))
9264 operands[1]
9265 = replace_equiv_address (operands[1],
9266 copy_addr_to_reg (XEXP (operands[1], 0)));
9267 }
9268
9269 /* Generate a vector of constants to permute MODE for a little-endian
9270 storage operation by swapping the two halves of a vector. */
9271 static rtvec
9272 rs6000_const_vec (machine_mode mode)
9273 {
9274 int i, subparts;
9275 rtvec v;
9276
9277 switch (mode)
9278 {
9279 case E_V1TImode:
9280 subparts = 1;
9281 break;
9282 case E_V2DFmode:
9283 case E_V2DImode:
9284 subparts = 2;
9285 break;
9286 case E_V4SFmode:
9287 case E_V4SImode:
9288 subparts = 4;
9289 break;
9290 case E_V8HImode:
9291 subparts = 8;
9292 break;
9293 case E_V16QImode:
9294 subparts = 16;
9295 break;
9296 default:
9297 gcc_unreachable();
9298 }
9299
9300 v = rtvec_alloc (subparts);
9301
9302 for (i = 0; i < subparts / 2; ++i)
9303 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9304 for (i = subparts / 2; i < subparts; ++i)
9305 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9306
9307 return v;
9308 }
9309
9310 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9311 store operation. */
9312 void
9313 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9314 {
9315 /* Scalar permutations are easier to express in integer modes rather than
9316 floating-point modes, so cast them here. We use V1TImode instead
9317 of TImode to ensure that the values don't go through GPRs. */
9318 if (FLOAT128_VECTOR_P (mode))
9319 {
9320 dest = gen_lowpart (V1TImode, dest);
9321 source = gen_lowpart (V1TImode, source);
9322 mode = V1TImode;
9323 }
9324
9325 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9326 scalar. */
9327 if (mode == TImode || mode == V1TImode)
9328 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9329 GEN_INT (64))));
9330 else
9331 {
9332 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9333 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9334 }
9335 }
9336
9337 /* Emit a little-endian load from vector memory location SOURCE to VSX
9338 register DEST in mode MODE. The load is done with two permuting
9339 insn's that represent an lxvd2x and xxpermdi. */
9340 void
9341 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9342 {
9343 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9344 V1TImode). */
9345 if (mode == TImode || mode == V1TImode)
9346 {
9347 mode = V2DImode;
9348 dest = gen_lowpart (V2DImode, dest);
9349 source = adjust_address (source, V2DImode, 0);
9350 }
9351
9352 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9353 rs6000_emit_le_vsx_permute (tmp, source, mode);
9354 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9355 }
9356
9357 /* Emit a little-endian store to vector memory location DEST from VSX
9358 register SOURCE in mode MODE. The store is done with two permuting
9359 insn's that represent an xxpermdi and an stxvd2x. */
9360 void
9361 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9362 {
9363 /* This should never be called during or after LRA, because it does
9364 not re-permute the source register. It is intended only for use
9365 during expand. */
9366 gcc_assert (!lra_in_progress && !reload_completed);
9367
9368 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9369 V1TImode). */
9370 if (mode == TImode || mode == V1TImode)
9371 {
9372 mode = V2DImode;
9373 dest = adjust_address (dest, V2DImode, 0);
9374 source = gen_lowpart (V2DImode, source);
9375 }
9376
9377 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9378 rs6000_emit_le_vsx_permute (tmp, source, mode);
9379 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9380 }
9381
9382 /* Emit a sequence representing a little-endian VSX load or store,
9383 moving data from SOURCE to DEST in mode MODE. This is done
9384 separately from rs6000_emit_move to ensure it is called only
9385 during expand. LE VSX loads and stores introduced later are
9386 handled with a split. The expand-time RTL generation allows
9387 us to optimize away redundant pairs of register-permutes. */
9388 void
9389 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9390 {
9391 gcc_assert (!BYTES_BIG_ENDIAN
9392 && VECTOR_MEM_VSX_P (mode)
9393 && !TARGET_P9_VECTOR
9394 && !gpr_or_gpr_p (dest, source)
9395 && (MEM_P (source) ^ MEM_P (dest)));
9396
9397 if (MEM_P (source))
9398 {
9399 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9400 rs6000_emit_le_vsx_load (dest, source, mode);
9401 }
9402 else
9403 {
9404 if (!REG_P (source))
9405 source = force_reg (mode, source);
9406 rs6000_emit_le_vsx_store (dest, source, mode);
9407 }
9408 }
9409
9410 /* Return whether a SFmode or SImode move can be done without converting one
9411 mode to another. This arrises when we have:
9412
9413 (SUBREG:SF (REG:SI ...))
9414 (SUBREG:SI (REG:SF ...))
9415
9416 and one of the values is in a floating point/vector register, where SFmode
9417 scalars are stored in DFmode format. */
9418
9419 bool
9420 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9421 {
9422 if (TARGET_ALLOW_SF_SUBREG)
9423 return true;
9424
9425 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9426 return true;
9427
9428 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9429 return true;
9430
9431 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9432 if (SUBREG_P (dest))
9433 {
9434 rtx dest_subreg = SUBREG_REG (dest);
9435 rtx src_subreg = SUBREG_REG (src);
9436 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9437 }
9438
9439 return false;
9440 }
9441
9442
9443 /* Helper function to change moves with:
9444
9445 (SUBREG:SF (REG:SI)) and
9446 (SUBREG:SI (REG:SF))
9447
9448 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9449 values are stored as DFmode values in the VSX registers. We need to convert
9450 the bits before we can use a direct move or operate on the bits in the
9451 vector register as an integer type.
9452
9453 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9454
9455 static bool
9456 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9457 {
9458 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9459 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9460 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9461 {
9462 rtx inner_source = SUBREG_REG (source);
9463 machine_mode inner_mode = GET_MODE (inner_source);
9464
9465 if (mode == SImode && inner_mode == SFmode)
9466 {
9467 emit_insn (gen_movsi_from_sf (dest, inner_source));
9468 return true;
9469 }
9470
9471 if (mode == SFmode && inner_mode == SImode)
9472 {
9473 emit_insn (gen_movsf_from_si (dest, inner_source));
9474 return true;
9475 }
9476 }
9477
9478 return false;
9479 }
9480
9481 /* Emit a move from SOURCE to DEST in mode MODE. */
9482 void
9483 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9484 {
9485 rtx operands[2];
9486 operands[0] = dest;
9487 operands[1] = source;
9488
9489 if (TARGET_DEBUG_ADDR)
9490 {
9491 fprintf (stderr,
9492 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9493 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9494 GET_MODE_NAME (mode),
9495 lra_in_progress,
9496 reload_completed,
9497 can_create_pseudo_p ());
9498 debug_rtx (dest);
9499 fprintf (stderr, "source:\n");
9500 debug_rtx (source);
9501 }
9502
9503 /* Check that we get CONST_WIDE_INT only when we should. */
9504 if (CONST_WIDE_INT_P (operands[1])
9505 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9506 gcc_unreachable ();
9507
9508 #ifdef HAVE_AS_GNU_ATTRIBUTE
9509 /* If we use a long double type, set the flags in .gnu_attribute that say
9510 what the long double type is. This is to allow the linker's warning
9511 message for the wrong long double to be useful, even if the function does
9512 not do a call (for example, doing a 128-bit add on power9 if the long
9513 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9514 used if they aren't the default long dobule type. */
9515 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9516 {
9517 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9518 rs6000_passes_float = rs6000_passes_long_double = true;
9519
9520 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9521 rs6000_passes_float = rs6000_passes_long_double = true;
9522 }
9523 #endif
9524
9525 /* See if we need to special case SImode/SFmode SUBREG moves. */
9526 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9527 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9528 return;
9529
9530 /* Check if GCC is setting up a block move that will end up using FP
9531 registers as temporaries. We must make sure this is acceptable. */
9532 if (MEM_P (operands[0])
9533 && MEM_P (operands[1])
9534 && mode == DImode
9535 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9536 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9537 && ! (rs6000_slow_unaligned_access (SImode,
9538 (MEM_ALIGN (operands[0]) > 32
9539 ? 32 : MEM_ALIGN (operands[0])))
9540 || rs6000_slow_unaligned_access (SImode,
9541 (MEM_ALIGN (operands[1]) > 32
9542 ? 32 : MEM_ALIGN (operands[1]))))
9543 && ! MEM_VOLATILE_P (operands [0])
9544 && ! MEM_VOLATILE_P (operands [1]))
9545 {
9546 emit_move_insn (adjust_address (operands[0], SImode, 0),
9547 adjust_address (operands[1], SImode, 0));
9548 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9549 adjust_address (copy_rtx (operands[1]), SImode, 4));
9550 return;
9551 }
9552
9553 if (can_create_pseudo_p () && MEM_P (operands[0])
9554 && !gpc_reg_operand (operands[1], mode))
9555 operands[1] = force_reg (mode, operands[1]);
9556
9557 /* Recognize the case where operand[1] is a reference to thread-local
9558 data and load its address to a register. */
9559 if (tls_referenced_p (operands[1]))
9560 {
9561 enum tls_model model;
9562 rtx tmp = operands[1];
9563 rtx addend = NULL;
9564
9565 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9566 {
9567 addend = XEXP (XEXP (tmp, 0), 1);
9568 tmp = XEXP (XEXP (tmp, 0), 0);
9569 }
9570
9571 gcc_assert (SYMBOL_REF_P (tmp));
9572 model = SYMBOL_REF_TLS_MODEL (tmp);
9573 gcc_assert (model != 0);
9574
9575 tmp = rs6000_legitimize_tls_address (tmp, model);
9576 if (addend)
9577 {
9578 tmp = gen_rtx_PLUS (mode, tmp, addend);
9579 tmp = force_operand (tmp, operands[0]);
9580 }
9581 operands[1] = tmp;
9582 }
9583
9584 /* 128-bit constant floating-point values on Darwin should really be loaded
9585 as two parts. However, this premature splitting is a problem when DFmode
9586 values can go into Altivec registers. */
9587 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9588 && !reg_addr[DFmode].scalar_in_vmx_p)
9589 {
9590 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9591 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9592 DFmode);
9593 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9594 GET_MODE_SIZE (DFmode)),
9595 simplify_gen_subreg (DFmode, operands[1], mode,
9596 GET_MODE_SIZE (DFmode)),
9597 DFmode);
9598 return;
9599 }
9600
9601 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9602 p1:SD) if p1 is not of floating point class and p0 is spilled as
9603 we can have no analogous movsd_store for this. */
9604 if (lra_in_progress && mode == DDmode
9605 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9606 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9607 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9608 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9609 {
9610 enum reg_class cl;
9611 int regno = REGNO (SUBREG_REG (operands[1]));
9612
9613 if (!HARD_REGISTER_NUM_P (regno))
9614 {
9615 cl = reg_preferred_class (regno);
9616 regno = reg_renumber[regno];
9617 if (regno < 0)
9618 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9619 }
9620 if (regno >= 0 && ! FP_REGNO_P (regno))
9621 {
9622 mode = SDmode;
9623 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9624 operands[1] = SUBREG_REG (operands[1]);
9625 }
9626 }
9627 if (lra_in_progress
9628 && mode == SDmode
9629 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9630 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9631 && (REG_P (operands[1])
9632 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9633 {
9634 int regno = reg_or_subregno (operands[1]);
9635 enum reg_class cl;
9636
9637 if (!HARD_REGISTER_NUM_P (regno))
9638 {
9639 cl = reg_preferred_class (regno);
9640 gcc_assert (cl != NO_REGS);
9641 regno = reg_renumber[regno];
9642 if (regno < 0)
9643 regno = ira_class_hard_regs[cl][0];
9644 }
9645 if (FP_REGNO_P (regno))
9646 {
9647 if (GET_MODE (operands[0]) != DDmode)
9648 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9649 emit_insn (gen_movsd_store (operands[0], operands[1]));
9650 }
9651 else if (INT_REGNO_P (regno))
9652 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9653 else
9654 gcc_unreachable();
9655 return;
9656 }
9657 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9658 p:DD)) if p0 is not of floating point class and p1 is spilled as
9659 we can have no analogous movsd_load for this. */
9660 if (lra_in_progress && mode == DDmode
9661 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9662 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9663 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9664 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9665 {
9666 enum reg_class cl;
9667 int regno = REGNO (SUBREG_REG (operands[0]));
9668
9669 if (!HARD_REGISTER_NUM_P (regno))
9670 {
9671 cl = reg_preferred_class (regno);
9672 regno = reg_renumber[regno];
9673 if (regno < 0)
9674 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9675 }
9676 if (regno >= 0 && ! FP_REGNO_P (regno))
9677 {
9678 mode = SDmode;
9679 operands[0] = SUBREG_REG (operands[0]);
9680 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9681 }
9682 }
9683 if (lra_in_progress
9684 && mode == SDmode
9685 && (REG_P (operands[0])
9686 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9687 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9688 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9689 {
9690 int regno = reg_or_subregno (operands[0]);
9691 enum reg_class cl;
9692
9693 if (!HARD_REGISTER_NUM_P (regno))
9694 {
9695 cl = reg_preferred_class (regno);
9696 gcc_assert (cl != NO_REGS);
9697 regno = reg_renumber[regno];
9698 if (regno < 0)
9699 regno = ira_class_hard_regs[cl][0];
9700 }
9701 if (FP_REGNO_P (regno))
9702 {
9703 if (GET_MODE (operands[1]) != DDmode)
9704 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9705 emit_insn (gen_movsd_load (operands[0], operands[1]));
9706 }
9707 else if (INT_REGNO_P (regno))
9708 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9709 else
9710 gcc_unreachable();
9711 return;
9712 }
9713
9714 /* FIXME: In the long term, this switch statement should go away
9715 and be replaced by a sequence of tests based on things like
9716 mode == Pmode. */
9717 switch (mode)
9718 {
9719 case E_HImode:
9720 case E_QImode:
9721 if (CONSTANT_P (operands[1])
9722 && !CONST_INT_P (operands[1]))
9723 operands[1] = force_const_mem (mode, operands[1]);
9724 break;
9725
9726 case E_TFmode:
9727 case E_TDmode:
9728 case E_IFmode:
9729 case E_KFmode:
9730 if (FLOAT128_2REG_P (mode))
9731 rs6000_eliminate_indexed_memrefs (operands);
9732 /* fall through */
9733
9734 case E_DFmode:
9735 case E_DDmode:
9736 case E_SFmode:
9737 case E_SDmode:
9738 if (CONSTANT_P (operands[1])
9739 && ! easy_fp_constant (operands[1], mode))
9740 operands[1] = force_const_mem (mode, operands[1]);
9741 break;
9742
9743 case E_V16QImode:
9744 case E_V8HImode:
9745 case E_V4SFmode:
9746 case E_V4SImode:
9747 case E_V2DFmode:
9748 case E_V2DImode:
9749 case E_V1TImode:
9750 if (CONSTANT_P (operands[1])
9751 && !easy_vector_constant (operands[1], mode))
9752 operands[1] = force_const_mem (mode, operands[1]);
9753 break;
9754
9755 case E_SImode:
9756 case E_DImode:
9757 /* Use default pattern for address of ELF small data */
9758 if (TARGET_ELF
9759 && mode == Pmode
9760 && DEFAULT_ABI == ABI_V4
9761 && (SYMBOL_REF_P (operands[1])
9762 || GET_CODE (operands[1]) == CONST)
9763 && small_data_operand (operands[1], mode))
9764 {
9765 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9766 return;
9767 }
9768
9769 /* Use the default pattern for loading up PC-relative addresses. */
9770 if (TARGET_PCREL && mode == Pmode
9771 && pcrel_local_or_external_address (operands[1], Pmode))
9772 {
9773 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9774 return;
9775 }
9776
9777 if (DEFAULT_ABI == ABI_V4
9778 && mode == Pmode && mode == SImode
9779 && flag_pic == 1 && got_operand (operands[1], mode))
9780 {
9781 emit_insn (gen_movsi_got (operands[0], operands[1]));
9782 return;
9783 }
9784
9785 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9786 && TARGET_NO_TOC_OR_PCREL
9787 && ! flag_pic
9788 && mode == Pmode
9789 && CONSTANT_P (operands[1])
9790 && GET_CODE (operands[1]) != HIGH
9791 && !CONST_INT_P (operands[1]))
9792 {
9793 rtx target = (!can_create_pseudo_p ()
9794 ? operands[0]
9795 : gen_reg_rtx (mode));
9796
9797 /* If this is a function address on -mcall-aixdesc,
9798 convert it to the address of the descriptor. */
9799 if (DEFAULT_ABI == ABI_AIX
9800 && SYMBOL_REF_P (operands[1])
9801 && XSTR (operands[1], 0)[0] == '.')
9802 {
9803 const char *name = XSTR (operands[1], 0);
9804 rtx new_ref;
9805 while (*name == '.')
9806 name++;
9807 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9808 CONSTANT_POOL_ADDRESS_P (new_ref)
9809 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9810 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9811 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9812 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9813 operands[1] = new_ref;
9814 }
9815
9816 if (DEFAULT_ABI == ABI_DARWIN)
9817 {
9818 #if TARGET_MACHO
9819 /* This is not PIC code, but could require the subset of
9820 indirections used by mdynamic-no-pic. */
9821 if (MACHO_DYNAMIC_NO_PIC_P)
9822 {
9823 /* Take care of any required data indirection. */
9824 operands[1] = rs6000_machopic_legitimize_pic_address (
9825 operands[1], mode, operands[0]);
9826 if (operands[0] != operands[1])
9827 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9828 return;
9829 }
9830 #endif
9831 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9832 emit_insn (gen_macho_low (Pmode, operands[0],
9833 target, operands[1]));
9834 return;
9835 }
9836
9837 emit_insn (gen_elf_high (target, operands[1]));
9838 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9839 return;
9840 }
9841
9842 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9843 and we have put it in the TOC, we just need to make a TOC-relative
9844 reference to it. */
9845 if (TARGET_TOC
9846 && SYMBOL_REF_P (operands[1])
9847 && use_toc_relative_ref (operands[1], mode))
9848 operands[1] = create_TOC_reference (operands[1], operands[0]);
9849 else if (mode == Pmode
9850 && CONSTANT_P (operands[1])
9851 && GET_CODE (operands[1]) != HIGH
9852 && ((REG_P (operands[0])
9853 && FP_REGNO_P (REGNO (operands[0])))
9854 || !CONST_INT_P (operands[1])
9855 || (num_insns_constant (operands[1], mode)
9856 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9857 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9858 && (TARGET_CMODEL == CMODEL_SMALL
9859 || can_create_pseudo_p ()
9860 || (REG_P (operands[0])
9861 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9862 {
9863
9864 #if TARGET_MACHO
9865 /* Darwin uses a special PIC legitimizer. */
9866 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9867 {
9868 operands[1] =
9869 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9870 operands[0]);
9871 if (operands[0] != operands[1])
9872 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9873 return;
9874 }
9875 #endif
9876
9877 /* If we are to limit the number of things we put in the TOC and
9878 this is a symbol plus a constant we can add in one insn,
9879 just put the symbol in the TOC and add the constant. */
9880 if (GET_CODE (operands[1]) == CONST
9881 && TARGET_NO_SUM_IN_TOC
9882 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9883 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9884 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9885 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9886 && ! side_effects_p (operands[0]))
9887 {
9888 rtx sym =
9889 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9890 rtx other = XEXP (XEXP (operands[1], 0), 1);
9891
9892 sym = force_reg (mode, sym);
9893 emit_insn (gen_add3_insn (operands[0], sym, other));
9894 return;
9895 }
9896
9897 operands[1] = force_const_mem (mode, operands[1]);
9898
9899 if (TARGET_TOC
9900 && SYMBOL_REF_P (XEXP (operands[1], 0))
9901 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9902 {
9903 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9904 operands[0]);
9905 operands[1] = gen_const_mem (mode, tocref);
9906 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9907 }
9908 }
9909 break;
9910
9911 case E_TImode:
9912 if (!VECTOR_MEM_VSX_P (TImode))
9913 rs6000_eliminate_indexed_memrefs (operands);
9914 break;
9915
9916 case E_PTImode:
9917 rs6000_eliminate_indexed_memrefs (operands);
9918 break;
9919
9920 default:
9921 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9922 }
9923
9924 /* Above, we may have called force_const_mem which may have returned
9925 an invalid address. If we can, fix this up; otherwise, reload will
9926 have to deal with it. */
9927 if (MEM_P (operands[1]))
9928 operands[1] = validize_mem (operands[1]);
9929
9930 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9931 }
9932 \f
9933
9934 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9935 static void
9936 init_float128_ibm (machine_mode mode)
9937 {
9938 if (!TARGET_XL_COMPAT)
9939 {
9940 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
9941 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
9942 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
9943 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
9944
9945 if (!TARGET_HARD_FLOAT)
9946 {
9947 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
9948 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
9949 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
9950 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
9951 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
9952 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
9953 set_optab_libfunc (le_optab, mode, "__gcc_qle");
9954 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
9955
9956 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
9957 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
9958 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
9959 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
9960 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
9961 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
9962 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
9963 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
9964 }
9965 }
9966 else
9967 {
9968 set_optab_libfunc (add_optab, mode, "_xlqadd");
9969 set_optab_libfunc (sub_optab, mode, "_xlqsub");
9970 set_optab_libfunc (smul_optab, mode, "_xlqmul");
9971 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
9972 }
9973
9974 /* Add various conversions for IFmode to use the traditional TFmode
9975 names. */
9976 if (mode == IFmode)
9977 {
9978 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
9979 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
9980 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
9981 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
9982 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
9983 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
9984
9985 if (TARGET_POWERPC64)
9986 {
9987 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
9988 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
9989 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
9990 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
9991 }
9992 }
9993 }
9994
9995 /* Create a decl for either complex long double multiply or complex long double
9996 divide when long double is IEEE 128-bit floating point. We can't use
9997 __multc3 and __divtc3 because the original long double using IBM extended
9998 double used those names. The complex multiply/divide functions are encoded
9999 as builtin functions with a complex result and 4 scalar inputs. */
10000
10001 static void
10002 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10003 {
10004 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10005 name, NULL_TREE);
10006
10007 set_builtin_decl (fncode, fndecl, true);
10008
10009 if (TARGET_DEBUG_BUILTIN)
10010 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10011
10012 return;
10013 }
10014
10015 /* Set up IEEE 128-bit floating point routines. Use different names if the
10016 arguments can be passed in a vector register. The historical PowerPC
10017 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10018 continue to use that if we aren't using vector registers to pass IEEE
10019 128-bit floating point. */
10020
10021 static void
10022 init_float128_ieee (machine_mode mode)
10023 {
10024 if (FLOAT128_VECTOR_P (mode))
10025 {
10026 static bool complex_muldiv_init_p = false;
10027
10028 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10029 we have clone or target attributes, this will be called a second
10030 time. We want to create the built-in function only once. */
10031 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10032 {
10033 complex_muldiv_init_p = true;
10034 built_in_function fncode_mul =
10035 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10036 - MIN_MODE_COMPLEX_FLOAT);
10037 built_in_function fncode_div =
10038 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10039 - MIN_MODE_COMPLEX_FLOAT);
10040
10041 tree fntype = build_function_type_list (complex_long_double_type_node,
10042 long_double_type_node,
10043 long_double_type_node,
10044 long_double_type_node,
10045 long_double_type_node,
10046 NULL_TREE);
10047
10048 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10049 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10050 }
10051
10052 set_optab_libfunc (add_optab, mode, "__addkf3");
10053 set_optab_libfunc (sub_optab, mode, "__subkf3");
10054 set_optab_libfunc (neg_optab, mode, "__negkf2");
10055 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10056 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10057 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10058 set_optab_libfunc (abs_optab, mode, "__abskf2");
10059 set_optab_libfunc (powi_optab, mode, "__powikf2");
10060
10061 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10062 set_optab_libfunc (ne_optab, mode, "__nekf2");
10063 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10064 set_optab_libfunc (ge_optab, mode, "__gekf2");
10065 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10066 set_optab_libfunc (le_optab, mode, "__lekf2");
10067 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10068
10069 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10070 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10071 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10072 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10073
10074 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10075 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10076 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10077
10078 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10079 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10080 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10081
10082 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10083 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10084 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10085 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10086 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10087 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10088
10089 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10090 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10091 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10092 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10093
10094 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10095 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10096 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10097 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10098
10099 if (TARGET_POWERPC64)
10100 {
10101 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10102 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10103 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10104 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10105 }
10106 }
10107
10108 else
10109 {
10110 set_optab_libfunc (add_optab, mode, "_q_add");
10111 set_optab_libfunc (sub_optab, mode, "_q_sub");
10112 set_optab_libfunc (neg_optab, mode, "_q_neg");
10113 set_optab_libfunc (smul_optab, mode, "_q_mul");
10114 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10115 if (TARGET_PPC_GPOPT)
10116 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10117
10118 set_optab_libfunc (eq_optab, mode, "_q_feq");
10119 set_optab_libfunc (ne_optab, mode, "_q_fne");
10120 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10121 set_optab_libfunc (ge_optab, mode, "_q_fge");
10122 set_optab_libfunc (lt_optab, mode, "_q_flt");
10123 set_optab_libfunc (le_optab, mode, "_q_fle");
10124
10125 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10126 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10127 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10128 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10129 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10130 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10131 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10132 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10133 }
10134 }
10135
10136 static void
10137 rs6000_init_libfuncs (void)
10138 {
10139 /* __float128 support. */
10140 if (TARGET_FLOAT128_TYPE)
10141 {
10142 init_float128_ibm (IFmode);
10143 init_float128_ieee (KFmode);
10144 }
10145
10146 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10147 if (TARGET_LONG_DOUBLE_128)
10148 {
10149 if (!TARGET_IEEEQUAD)
10150 init_float128_ibm (TFmode);
10151
10152 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10153 else
10154 init_float128_ieee (TFmode);
10155 }
10156 }
10157
10158 /* Emit a potentially record-form instruction, setting DST from SRC.
10159 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10160 signed comparison of DST with zero. If DOT is 1, the generated RTL
10161 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10162 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10163 a separate COMPARE. */
10164
10165 void
10166 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10167 {
10168 if (dot == 0)
10169 {
10170 emit_move_insn (dst, src);
10171 return;
10172 }
10173
10174 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10175 {
10176 emit_move_insn (dst, src);
10177 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10178 return;
10179 }
10180
10181 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10182 if (dot == 1)
10183 {
10184 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10185 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10186 }
10187 else
10188 {
10189 rtx set = gen_rtx_SET (dst, src);
10190 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10191 }
10192 }
10193
10194 \f
10195 /* A validation routine: say whether CODE, a condition code, and MODE
10196 match. The other alternatives either don't make sense or should
10197 never be generated. */
10198
10199 void
10200 validate_condition_mode (enum rtx_code code, machine_mode mode)
10201 {
10202 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10203 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10204 && GET_MODE_CLASS (mode) == MODE_CC);
10205
10206 /* These don't make sense. */
10207 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10208 || mode != CCUNSmode);
10209
10210 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10211 || mode == CCUNSmode);
10212
10213 gcc_assert (mode == CCFPmode
10214 || (code != ORDERED && code != UNORDERED
10215 && code != UNEQ && code != LTGT
10216 && code != UNGT && code != UNLT
10217 && code != UNGE && code != UNLE));
10218
10219 /* These are invalid; the information is not there. */
10220 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10221 }
10222
10223 \f
10224 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10225 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10226 not zero, store there the bit offset (counted from the right) where
10227 the single stretch of 1 bits begins; and similarly for B, the bit
10228 offset where it ends. */
10229
10230 bool
10231 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10232 {
10233 unsigned HOST_WIDE_INT val = INTVAL (mask);
10234 unsigned HOST_WIDE_INT bit;
10235 int nb, ne;
10236 int n = GET_MODE_PRECISION (mode);
10237
10238 if (mode != DImode && mode != SImode)
10239 return false;
10240
10241 if (INTVAL (mask) >= 0)
10242 {
10243 bit = val & -val;
10244 ne = exact_log2 (bit);
10245 nb = exact_log2 (val + bit);
10246 }
10247 else if (val + 1 == 0)
10248 {
10249 nb = n;
10250 ne = 0;
10251 }
10252 else if (val & 1)
10253 {
10254 val = ~val;
10255 bit = val & -val;
10256 nb = exact_log2 (bit);
10257 ne = exact_log2 (val + bit);
10258 }
10259 else
10260 {
10261 bit = val & -val;
10262 ne = exact_log2 (bit);
10263 if (val + bit == 0)
10264 nb = n;
10265 else
10266 nb = 0;
10267 }
10268
10269 nb--;
10270
10271 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10272 return false;
10273
10274 if (b)
10275 *b = nb;
10276 if (e)
10277 *e = ne;
10278
10279 return true;
10280 }
10281
10282 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10283 or rldicr instruction, to implement an AND with it in mode MODE. */
10284
10285 bool
10286 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10287 {
10288 int nb, ne;
10289
10290 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10291 return false;
10292
10293 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10294 does not wrap. */
10295 if (mode == DImode)
10296 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10297
10298 /* For SImode, rlwinm can do everything. */
10299 if (mode == SImode)
10300 return (nb < 32 && ne < 32);
10301
10302 return false;
10303 }
10304
10305 /* Return the instruction template for an AND with mask in mode MODE, with
10306 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10307
10308 const char *
10309 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10310 {
10311 int nb, ne;
10312
10313 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10314 gcc_unreachable ();
10315
10316 if (mode == DImode && ne == 0)
10317 {
10318 operands[3] = GEN_INT (63 - nb);
10319 if (dot)
10320 return "rldicl. %0,%1,0,%3";
10321 return "rldicl %0,%1,0,%3";
10322 }
10323
10324 if (mode == DImode && nb == 63)
10325 {
10326 operands[3] = GEN_INT (63 - ne);
10327 if (dot)
10328 return "rldicr. %0,%1,0,%3";
10329 return "rldicr %0,%1,0,%3";
10330 }
10331
10332 if (nb < 32 && ne < 32)
10333 {
10334 operands[3] = GEN_INT (31 - nb);
10335 operands[4] = GEN_INT (31 - ne);
10336 if (dot)
10337 return "rlwinm. %0,%1,0,%3,%4";
10338 return "rlwinm %0,%1,0,%3,%4";
10339 }
10340
10341 gcc_unreachable ();
10342 }
10343
10344 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10345 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10346 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10347
10348 bool
10349 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10350 {
10351 int nb, ne;
10352
10353 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10354 return false;
10355
10356 int n = GET_MODE_PRECISION (mode);
10357 int sh = -1;
10358
10359 if (CONST_INT_P (XEXP (shift, 1)))
10360 {
10361 sh = INTVAL (XEXP (shift, 1));
10362 if (sh < 0 || sh >= n)
10363 return false;
10364 }
10365
10366 rtx_code code = GET_CODE (shift);
10367
10368 /* Convert any shift by 0 to a rotate, to simplify below code. */
10369 if (sh == 0)
10370 code = ROTATE;
10371
10372 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10373 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10374 code = ASHIFT;
10375 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10376 {
10377 code = LSHIFTRT;
10378 sh = n - sh;
10379 }
10380
10381 /* DImode rotates need rld*. */
10382 if (mode == DImode && code == ROTATE)
10383 return (nb == 63 || ne == 0 || ne == sh);
10384
10385 /* SImode rotates need rlw*. */
10386 if (mode == SImode && code == ROTATE)
10387 return (nb < 32 && ne < 32 && sh < 32);
10388
10389 /* Wrap-around masks are only okay for rotates. */
10390 if (ne > nb)
10391 return false;
10392
10393 /* Variable shifts are only okay for rotates. */
10394 if (sh < 0)
10395 return false;
10396
10397 /* Don't allow ASHIFT if the mask is wrong for that. */
10398 if (code == ASHIFT && ne < sh)
10399 return false;
10400
10401 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10402 if the mask is wrong for that. */
10403 if (nb < 32 && ne < 32 && sh < 32
10404 && !(code == LSHIFTRT && nb >= 32 - sh))
10405 return true;
10406
10407 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10408 if the mask is wrong for that. */
10409 if (code == LSHIFTRT)
10410 sh = 64 - sh;
10411 if (nb == 63 || ne == 0 || ne == sh)
10412 return !(code == LSHIFTRT && nb >= sh);
10413
10414 return false;
10415 }
10416
10417 /* Return the instruction template for a shift with mask in mode MODE, with
10418 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10419
10420 const char *
10421 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10422 {
10423 int nb, ne;
10424
10425 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10426 gcc_unreachable ();
10427
10428 if (mode == DImode && ne == 0)
10429 {
10430 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10431 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10432 operands[3] = GEN_INT (63 - nb);
10433 if (dot)
10434 return "rld%I2cl. %0,%1,%2,%3";
10435 return "rld%I2cl %0,%1,%2,%3";
10436 }
10437
10438 if (mode == DImode && nb == 63)
10439 {
10440 operands[3] = GEN_INT (63 - ne);
10441 if (dot)
10442 return "rld%I2cr. %0,%1,%2,%3";
10443 return "rld%I2cr %0,%1,%2,%3";
10444 }
10445
10446 if (mode == DImode
10447 && GET_CODE (operands[4]) != LSHIFTRT
10448 && CONST_INT_P (operands[2])
10449 && ne == INTVAL (operands[2]))
10450 {
10451 operands[3] = GEN_INT (63 - nb);
10452 if (dot)
10453 return "rld%I2c. %0,%1,%2,%3";
10454 return "rld%I2c %0,%1,%2,%3";
10455 }
10456
10457 if (nb < 32 && ne < 32)
10458 {
10459 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10460 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10461 operands[3] = GEN_INT (31 - nb);
10462 operands[4] = GEN_INT (31 - ne);
10463 /* This insn can also be a 64-bit rotate with mask that really makes
10464 it just a shift right (with mask); the %h below are to adjust for
10465 that situation (shift count is >= 32 in that case). */
10466 if (dot)
10467 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10468 return "rlw%I2nm %0,%1,%h2,%3,%4";
10469 }
10470
10471 gcc_unreachable ();
10472 }
10473
10474 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10475 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10476 ASHIFT, or LSHIFTRT) in mode MODE. */
10477
10478 bool
10479 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10480 {
10481 int nb, ne;
10482
10483 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10484 return false;
10485
10486 int n = GET_MODE_PRECISION (mode);
10487
10488 int sh = INTVAL (XEXP (shift, 1));
10489 if (sh < 0 || sh >= n)
10490 return false;
10491
10492 rtx_code code = GET_CODE (shift);
10493
10494 /* Convert any shift by 0 to a rotate, to simplify below code. */
10495 if (sh == 0)
10496 code = ROTATE;
10497
10498 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10499 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10500 code = ASHIFT;
10501 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10502 {
10503 code = LSHIFTRT;
10504 sh = n - sh;
10505 }
10506
10507 /* DImode rotates need rldimi. */
10508 if (mode == DImode && code == ROTATE)
10509 return (ne == sh);
10510
10511 /* SImode rotates need rlwimi. */
10512 if (mode == SImode && code == ROTATE)
10513 return (nb < 32 && ne < 32 && sh < 32);
10514
10515 /* Wrap-around masks are only okay for rotates. */
10516 if (ne > nb)
10517 return false;
10518
10519 /* Don't allow ASHIFT if the mask is wrong for that. */
10520 if (code == ASHIFT && ne < sh)
10521 return false;
10522
10523 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10524 if the mask is wrong for that. */
10525 if (nb < 32 && ne < 32 && sh < 32
10526 && !(code == LSHIFTRT && nb >= 32 - sh))
10527 return true;
10528
10529 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10530 if the mask is wrong for that. */
10531 if (code == LSHIFTRT)
10532 sh = 64 - sh;
10533 if (ne == sh)
10534 return !(code == LSHIFTRT && nb >= sh);
10535
10536 return false;
10537 }
10538
10539 /* Return the instruction template for an insert with mask in mode MODE, with
10540 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10541
10542 const char *
10543 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10544 {
10545 int nb, ne;
10546
10547 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10548 gcc_unreachable ();
10549
10550 /* Prefer rldimi because rlwimi is cracked. */
10551 if (TARGET_POWERPC64
10552 && (!dot || mode == DImode)
10553 && GET_CODE (operands[4]) != LSHIFTRT
10554 && ne == INTVAL (operands[2]))
10555 {
10556 operands[3] = GEN_INT (63 - nb);
10557 if (dot)
10558 return "rldimi. %0,%1,%2,%3";
10559 return "rldimi %0,%1,%2,%3";
10560 }
10561
10562 if (nb < 32 && ne < 32)
10563 {
10564 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10565 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10566 operands[3] = GEN_INT (31 - nb);
10567 operands[4] = GEN_INT (31 - ne);
10568 if (dot)
10569 return "rlwimi. %0,%1,%2,%3,%4";
10570 return "rlwimi %0,%1,%2,%3,%4";
10571 }
10572
10573 gcc_unreachable ();
10574 }
10575
10576 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10577 using two machine instructions. */
10578
10579 bool
10580 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10581 {
10582 /* There are two kinds of AND we can handle with two insns:
10583 1) those we can do with two rl* insn;
10584 2) ori[s];xori[s].
10585
10586 We do not handle that last case yet. */
10587
10588 /* If there is just one stretch of ones, we can do it. */
10589 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10590 return true;
10591
10592 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10593 one insn, we can do the whole thing with two. */
10594 unsigned HOST_WIDE_INT val = INTVAL (c);
10595 unsigned HOST_WIDE_INT bit1 = val & -val;
10596 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10597 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10598 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10599 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10600 }
10601
10602 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10603 If EXPAND is true, split rotate-and-mask instructions we generate to
10604 their constituent parts as well (this is used during expand); if DOT
10605 is 1, make the last insn a record-form instruction clobbering the
10606 destination GPR and setting the CC reg (from operands[3]); if 2, set
10607 that GPR as well as the CC reg. */
10608
10609 void
10610 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10611 {
10612 gcc_assert (!(expand && dot));
10613
10614 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10615
10616 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10617 shift right. This generates better code than doing the masks without
10618 shifts, or shifting first right and then left. */
10619 int nb, ne;
10620 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10621 {
10622 gcc_assert (mode == DImode);
10623
10624 int shift = 63 - nb;
10625 if (expand)
10626 {
10627 rtx tmp1 = gen_reg_rtx (DImode);
10628 rtx tmp2 = gen_reg_rtx (DImode);
10629 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10630 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10631 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10632 }
10633 else
10634 {
10635 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10636 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10637 emit_move_insn (operands[0], tmp);
10638 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10639 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10640 }
10641 return;
10642 }
10643
10644 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10645 that does the rest. */
10646 unsigned HOST_WIDE_INT bit1 = val & -val;
10647 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10648 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10649 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10650
10651 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10652 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10653
10654 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10655
10656 /* Two "no-rotate"-and-mask instructions, for SImode. */
10657 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10658 {
10659 gcc_assert (mode == SImode);
10660
10661 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10662 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10663 emit_move_insn (reg, tmp);
10664 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10665 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10666 return;
10667 }
10668
10669 gcc_assert (mode == DImode);
10670
10671 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10672 insns; we have to do the first in SImode, because it wraps. */
10673 if (mask2 <= 0xffffffff
10674 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10675 {
10676 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10677 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10678 GEN_INT (mask1));
10679 rtx reg_low = gen_lowpart (SImode, reg);
10680 emit_move_insn (reg_low, tmp);
10681 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10682 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10683 return;
10684 }
10685
10686 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10687 at the top end), rotate back and clear the other hole. */
10688 int right = exact_log2 (bit3);
10689 int left = 64 - right;
10690
10691 /* Rotate the mask too. */
10692 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10693
10694 if (expand)
10695 {
10696 rtx tmp1 = gen_reg_rtx (DImode);
10697 rtx tmp2 = gen_reg_rtx (DImode);
10698 rtx tmp3 = gen_reg_rtx (DImode);
10699 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10700 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10701 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10702 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10703 }
10704 else
10705 {
10706 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10707 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10708 emit_move_insn (operands[0], tmp);
10709 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10710 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10711 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10712 }
10713 }
10714 \f
10715 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10716 for lfq and stfq insns iff the registers are hard registers. */
10717
10718 int
10719 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10720 {
10721 /* We might have been passed a SUBREG. */
10722 if (!REG_P (reg1) || !REG_P (reg2))
10723 return 0;
10724
10725 /* We might have been passed non floating point registers. */
10726 if (!FP_REGNO_P (REGNO (reg1))
10727 || !FP_REGNO_P (REGNO (reg2)))
10728 return 0;
10729
10730 return (REGNO (reg1) == REGNO (reg2) - 1);
10731 }
10732
10733 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10734 addr1 and addr2 must be in consecutive memory locations
10735 (addr2 == addr1 + 8). */
10736
10737 int
10738 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10739 {
10740 rtx addr1, addr2;
10741 unsigned int reg1, reg2;
10742 int offset1, offset2;
10743
10744 /* The mems cannot be volatile. */
10745 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10746 return 0;
10747
10748 addr1 = XEXP (mem1, 0);
10749 addr2 = XEXP (mem2, 0);
10750
10751 /* Extract an offset (if used) from the first addr. */
10752 if (GET_CODE (addr1) == PLUS)
10753 {
10754 /* If not a REG, return zero. */
10755 if (!REG_P (XEXP (addr1, 0)))
10756 return 0;
10757 else
10758 {
10759 reg1 = REGNO (XEXP (addr1, 0));
10760 /* The offset must be constant! */
10761 if (!CONST_INT_P (XEXP (addr1, 1)))
10762 return 0;
10763 offset1 = INTVAL (XEXP (addr1, 1));
10764 }
10765 }
10766 else if (!REG_P (addr1))
10767 return 0;
10768 else
10769 {
10770 reg1 = REGNO (addr1);
10771 /* This was a simple (mem (reg)) expression. Offset is 0. */
10772 offset1 = 0;
10773 }
10774
10775 /* And now for the second addr. */
10776 if (GET_CODE (addr2) == PLUS)
10777 {
10778 /* If not a REG, return zero. */
10779 if (!REG_P (XEXP (addr2, 0)))
10780 return 0;
10781 else
10782 {
10783 reg2 = REGNO (XEXP (addr2, 0));
10784 /* The offset must be constant. */
10785 if (!CONST_INT_P (XEXP (addr2, 1)))
10786 return 0;
10787 offset2 = INTVAL (XEXP (addr2, 1));
10788 }
10789 }
10790 else if (!REG_P (addr2))
10791 return 0;
10792 else
10793 {
10794 reg2 = REGNO (addr2);
10795 /* This was a simple (mem (reg)) expression. Offset is 0. */
10796 offset2 = 0;
10797 }
10798
10799 /* Both of these must have the same base register. */
10800 if (reg1 != reg2)
10801 return 0;
10802
10803 /* The offset for the second addr must be 8 more than the first addr. */
10804 if (offset2 != offset1 + 8)
10805 return 0;
10806
10807 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10808 instructions. */
10809 return 1;
10810 }
10811 \f
10812 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10813 need to use DDmode, in all other cases we can use the same mode. */
10814 static machine_mode
10815 rs6000_secondary_memory_needed_mode (machine_mode mode)
10816 {
10817 if (lra_in_progress && mode == SDmode)
10818 return DDmode;
10819 return mode;
10820 }
10821
10822 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10823 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10824 only work on the traditional altivec registers, note if an altivec register
10825 was chosen. */
10826
10827 static enum rs6000_reg_type
10828 register_to_reg_type (rtx reg, bool *is_altivec)
10829 {
10830 HOST_WIDE_INT regno;
10831 enum reg_class rclass;
10832
10833 if (SUBREG_P (reg))
10834 reg = SUBREG_REG (reg);
10835
10836 if (!REG_P (reg))
10837 return NO_REG_TYPE;
10838
10839 regno = REGNO (reg);
10840 if (!HARD_REGISTER_NUM_P (regno))
10841 {
10842 if (!lra_in_progress && !reload_completed)
10843 return PSEUDO_REG_TYPE;
10844
10845 regno = true_regnum (reg);
10846 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10847 return PSEUDO_REG_TYPE;
10848 }
10849
10850 gcc_assert (regno >= 0);
10851
10852 if (is_altivec && ALTIVEC_REGNO_P (regno))
10853 *is_altivec = true;
10854
10855 rclass = rs6000_regno_regclass[regno];
10856 return reg_class_to_reg_type[(int)rclass];
10857 }
10858
10859 /* Helper function to return the cost of adding a TOC entry address. */
10860
10861 static inline int
10862 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10863 {
10864 int ret;
10865
10866 if (TARGET_CMODEL != CMODEL_SMALL)
10867 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10868
10869 else
10870 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10871
10872 return ret;
10873 }
10874
10875 /* Helper function for rs6000_secondary_reload to determine whether the memory
10876 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10877 needs reloading. Return negative if the memory is not handled by the memory
10878 helper functions and to try a different reload method, 0 if no additional
10879 instructions are need, and positive to give the extra cost for the
10880 memory. */
10881
10882 static int
10883 rs6000_secondary_reload_memory (rtx addr,
10884 enum reg_class rclass,
10885 machine_mode mode)
10886 {
10887 int extra_cost = 0;
10888 rtx reg, and_arg, plus_arg0, plus_arg1;
10889 addr_mask_type addr_mask;
10890 const char *type = NULL;
10891 const char *fail_msg = NULL;
10892
10893 if (GPR_REG_CLASS_P (rclass))
10894 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10895
10896 else if (rclass == FLOAT_REGS)
10897 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10898
10899 else if (rclass == ALTIVEC_REGS)
10900 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10901
10902 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10903 else if (rclass == VSX_REGS)
10904 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10905 & ~RELOAD_REG_AND_M16);
10906
10907 /* If the register allocator hasn't made up its mind yet on the register
10908 class to use, settle on defaults to use. */
10909 else if (rclass == NO_REGS)
10910 {
10911 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10912 & ~RELOAD_REG_AND_M16);
10913
10914 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10915 addr_mask &= ~(RELOAD_REG_INDEXED
10916 | RELOAD_REG_PRE_INCDEC
10917 | RELOAD_REG_PRE_MODIFY);
10918 }
10919
10920 else
10921 addr_mask = 0;
10922
10923 /* If the register isn't valid in this register class, just return now. */
10924 if ((addr_mask & RELOAD_REG_VALID) == 0)
10925 {
10926 if (TARGET_DEBUG_ADDR)
10927 {
10928 fprintf (stderr,
10929 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10930 "not valid in class\n",
10931 GET_MODE_NAME (mode), reg_class_names[rclass]);
10932 debug_rtx (addr);
10933 }
10934
10935 return -1;
10936 }
10937
10938 switch (GET_CODE (addr))
10939 {
10940 /* Does the register class supports auto update forms for this mode? We
10941 don't need a scratch register, since the powerpc only supports
10942 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10943 case PRE_INC:
10944 case PRE_DEC:
10945 reg = XEXP (addr, 0);
10946 if (!base_reg_operand (addr, GET_MODE (reg)))
10947 {
10948 fail_msg = "no base register #1";
10949 extra_cost = -1;
10950 }
10951
10952 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
10953 {
10954 extra_cost = 1;
10955 type = "update";
10956 }
10957 break;
10958
10959 case PRE_MODIFY:
10960 reg = XEXP (addr, 0);
10961 plus_arg1 = XEXP (addr, 1);
10962 if (!base_reg_operand (reg, GET_MODE (reg))
10963 || GET_CODE (plus_arg1) != PLUS
10964 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
10965 {
10966 fail_msg = "bad PRE_MODIFY";
10967 extra_cost = -1;
10968 }
10969
10970 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
10971 {
10972 extra_cost = 1;
10973 type = "update";
10974 }
10975 break;
10976
10977 /* Do we need to simulate AND -16 to clear the bottom address bits used
10978 in VMX load/stores? Only allow the AND for vector sizes. */
10979 case AND:
10980 and_arg = XEXP (addr, 0);
10981 if (GET_MODE_SIZE (mode) != 16
10982 || !CONST_INT_P (XEXP (addr, 1))
10983 || INTVAL (XEXP (addr, 1)) != -16)
10984 {
10985 fail_msg = "bad Altivec AND #1";
10986 extra_cost = -1;
10987 }
10988
10989 if (rclass != ALTIVEC_REGS)
10990 {
10991 if (legitimate_indirect_address_p (and_arg, false))
10992 extra_cost = 1;
10993
10994 else if (legitimate_indexed_address_p (and_arg, false))
10995 extra_cost = 2;
10996
10997 else
10998 {
10999 fail_msg = "bad Altivec AND #2";
11000 extra_cost = -1;
11001 }
11002
11003 type = "and";
11004 }
11005 break;
11006
11007 /* If this is an indirect address, make sure it is a base register. */
11008 case REG:
11009 case SUBREG:
11010 if (!legitimate_indirect_address_p (addr, false))
11011 {
11012 extra_cost = 1;
11013 type = "move";
11014 }
11015 break;
11016
11017 /* If this is an indexed address, make sure the register class can handle
11018 indexed addresses for this mode. */
11019 case PLUS:
11020 plus_arg0 = XEXP (addr, 0);
11021 plus_arg1 = XEXP (addr, 1);
11022
11023 /* (plus (plus (reg) (constant)) (constant)) is generated during
11024 push_reload processing, so handle it now. */
11025 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11026 {
11027 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11028 {
11029 extra_cost = 1;
11030 type = "offset";
11031 }
11032 }
11033
11034 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11035 push_reload processing, so handle it now. */
11036 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11037 {
11038 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11039 {
11040 extra_cost = 1;
11041 type = "indexed #2";
11042 }
11043 }
11044
11045 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11046 {
11047 fail_msg = "no base register #2";
11048 extra_cost = -1;
11049 }
11050
11051 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11052 {
11053 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11054 || !legitimate_indexed_address_p (addr, false))
11055 {
11056 extra_cost = 1;
11057 type = "indexed";
11058 }
11059 }
11060
11061 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11062 && CONST_INT_P (plus_arg1))
11063 {
11064 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11065 {
11066 extra_cost = 1;
11067 type = "vector d-form offset";
11068 }
11069 }
11070
11071 /* Make sure the register class can handle offset addresses. */
11072 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11073 {
11074 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11075 {
11076 extra_cost = 1;
11077 type = "offset #2";
11078 }
11079 }
11080
11081 else
11082 {
11083 fail_msg = "bad PLUS";
11084 extra_cost = -1;
11085 }
11086
11087 break;
11088
11089 case LO_SUM:
11090 /* Quad offsets are restricted and can't handle normal addresses. */
11091 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11092 {
11093 extra_cost = -1;
11094 type = "vector d-form lo_sum";
11095 }
11096
11097 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11098 {
11099 fail_msg = "bad LO_SUM";
11100 extra_cost = -1;
11101 }
11102
11103 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11104 {
11105 extra_cost = 1;
11106 type = "lo_sum";
11107 }
11108 break;
11109
11110 /* Static addresses need to create a TOC entry. */
11111 case CONST:
11112 case SYMBOL_REF:
11113 case LABEL_REF:
11114 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11115 {
11116 extra_cost = -1;
11117 type = "vector d-form lo_sum #2";
11118 }
11119
11120 else
11121 {
11122 type = "address";
11123 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11124 }
11125 break;
11126
11127 /* TOC references look like offsetable memory. */
11128 case UNSPEC:
11129 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11130 {
11131 fail_msg = "bad UNSPEC";
11132 extra_cost = -1;
11133 }
11134
11135 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11136 {
11137 extra_cost = -1;
11138 type = "vector d-form lo_sum #3";
11139 }
11140
11141 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11142 {
11143 extra_cost = 1;
11144 type = "toc reference";
11145 }
11146 break;
11147
11148 default:
11149 {
11150 fail_msg = "bad address";
11151 extra_cost = -1;
11152 }
11153 }
11154
11155 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11156 {
11157 if (extra_cost < 0)
11158 fprintf (stderr,
11159 "rs6000_secondary_reload_memory error: mode = %s, "
11160 "class = %s, addr_mask = '%s', %s\n",
11161 GET_MODE_NAME (mode),
11162 reg_class_names[rclass],
11163 rs6000_debug_addr_mask (addr_mask, false),
11164 (fail_msg != NULL) ? fail_msg : "<bad address>");
11165
11166 else
11167 fprintf (stderr,
11168 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11169 "addr_mask = '%s', extra cost = %d, %s\n",
11170 GET_MODE_NAME (mode),
11171 reg_class_names[rclass],
11172 rs6000_debug_addr_mask (addr_mask, false),
11173 extra_cost,
11174 (type) ? type : "<none>");
11175
11176 debug_rtx (addr);
11177 }
11178
11179 return extra_cost;
11180 }
11181
11182 /* Helper function for rs6000_secondary_reload to return true if a move to a
11183 different register classe is really a simple move. */
11184
11185 static bool
11186 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11187 enum rs6000_reg_type from_type,
11188 machine_mode mode)
11189 {
11190 int size = GET_MODE_SIZE (mode);
11191
11192 /* Add support for various direct moves available. In this function, we only
11193 look at cases where we don't need any extra registers, and one or more
11194 simple move insns are issued. Originally small integers are not allowed
11195 in FPR/VSX registers. Single precision binary floating is not a simple
11196 move because we need to convert to the single precision memory layout.
11197 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11198 need special direct move handling, which we do not support yet. */
11199 if (TARGET_DIRECT_MOVE
11200 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11201 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11202 {
11203 if (TARGET_POWERPC64)
11204 {
11205 /* ISA 2.07: MTVSRD or MVFVSRD. */
11206 if (size == 8)
11207 return true;
11208
11209 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11210 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11211 return true;
11212 }
11213
11214 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11215 if (TARGET_P8_VECTOR)
11216 {
11217 if (mode == SImode)
11218 return true;
11219
11220 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11221 return true;
11222 }
11223
11224 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11225 if (mode == SDmode)
11226 return true;
11227 }
11228
11229 /* Move to/from SPR. */
11230 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11231 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11232 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11233 return true;
11234
11235 return false;
11236 }
11237
11238 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11239 special direct moves that involve allocating an extra register, return the
11240 insn code of the helper function if there is such a function or
11241 CODE_FOR_nothing if not. */
11242
11243 static bool
11244 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11245 enum rs6000_reg_type from_type,
11246 machine_mode mode,
11247 secondary_reload_info *sri,
11248 bool altivec_p)
11249 {
11250 bool ret = false;
11251 enum insn_code icode = CODE_FOR_nothing;
11252 int cost = 0;
11253 int size = GET_MODE_SIZE (mode);
11254
11255 if (TARGET_POWERPC64 && size == 16)
11256 {
11257 /* Handle moving 128-bit values from GPRs to VSX point registers on
11258 ISA 2.07 (power8, power9) when running in 64-bit mode using
11259 XXPERMDI to glue the two 64-bit values back together. */
11260 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11261 {
11262 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11263 icode = reg_addr[mode].reload_vsx_gpr;
11264 }
11265
11266 /* Handle moving 128-bit values from VSX point registers to GPRs on
11267 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11268 bottom 64-bit value. */
11269 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11270 {
11271 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11272 icode = reg_addr[mode].reload_gpr_vsx;
11273 }
11274 }
11275
11276 else if (TARGET_POWERPC64 && mode == SFmode)
11277 {
11278 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11279 {
11280 cost = 3; /* xscvdpspn, mfvsrd, and. */
11281 icode = reg_addr[mode].reload_gpr_vsx;
11282 }
11283
11284 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11285 {
11286 cost = 2; /* mtvsrz, xscvspdpn. */
11287 icode = reg_addr[mode].reload_vsx_gpr;
11288 }
11289 }
11290
11291 else if (!TARGET_POWERPC64 && size == 8)
11292 {
11293 /* Handle moving 64-bit values from GPRs to floating point registers on
11294 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11295 32-bit values back together. Altivec register classes must be handled
11296 specially since a different instruction is used, and the secondary
11297 reload support requires a single instruction class in the scratch
11298 register constraint. However, right now TFmode is not allowed in
11299 Altivec registers, so the pattern will never match. */
11300 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11301 {
11302 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11303 icode = reg_addr[mode].reload_fpr_gpr;
11304 }
11305 }
11306
11307 if (icode != CODE_FOR_nothing)
11308 {
11309 ret = true;
11310 if (sri)
11311 {
11312 sri->icode = icode;
11313 sri->extra_cost = cost;
11314 }
11315 }
11316
11317 return ret;
11318 }
11319
11320 /* Return whether a move between two register classes can be done either
11321 directly (simple move) or via a pattern that uses a single extra temporary
11322 (using ISA 2.07's direct move in this case. */
11323
11324 static bool
11325 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11326 enum rs6000_reg_type from_type,
11327 machine_mode mode,
11328 secondary_reload_info *sri,
11329 bool altivec_p)
11330 {
11331 /* Fall back to load/store reloads if either type is not a register. */
11332 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11333 return false;
11334
11335 /* If we haven't allocated registers yet, assume the move can be done for the
11336 standard register types. */
11337 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11338 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11339 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11340 return true;
11341
11342 /* Moves to the same set of registers is a simple move for non-specialized
11343 registers. */
11344 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11345 return true;
11346
11347 /* Check whether a simple move can be done directly. */
11348 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11349 {
11350 if (sri)
11351 {
11352 sri->icode = CODE_FOR_nothing;
11353 sri->extra_cost = 0;
11354 }
11355 return true;
11356 }
11357
11358 /* Now check if we can do it in a few steps. */
11359 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11360 altivec_p);
11361 }
11362
11363 /* Inform reload about cases where moving X with a mode MODE to a register in
11364 RCLASS requires an extra scratch or immediate register. Return the class
11365 needed for the immediate register.
11366
11367 For VSX and Altivec, we may need a register to convert sp+offset into
11368 reg+sp.
11369
11370 For misaligned 64-bit gpr loads and stores we need a register to
11371 convert an offset address to indirect. */
11372
11373 static reg_class_t
11374 rs6000_secondary_reload (bool in_p,
11375 rtx x,
11376 reg_class_t rclass_i,
11377 machine_mode mode,
11378 secondary_reload_info *sri)
11379 {
11380 enum reg_class rclass = (enum reg_class) rclass_i;
11381 reg_class_t ret = ALL_REGS;
11382 enum insn_code icode;
11383 bool default_p = false;
11384 bool done_p = false;
11385
11386 /* Allow subreg of memory before/during reload. */
11387 bool memory_p = (MEM_P (x)
11388 || (!reload_completed && SUBREG_P (x)
11389 && MEM_P (SUBREG_REG (x))));
11390
11391 sri->icode = CODE_FOR_nothing;
11392 sri->t_icode = CODE_FOR_nothing;
11393 sri->extra_cost = 0;
11394 icode = ((in_p)
11395 ? reg_addr[mode].reload_load
11396 : reg_addr[mode].reload_store);
11397
11398 if (REG_P (x) || register_operand (x, mode))
11399 {
11400 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11401 bool altivec_p = (rclass == ALTIVEC_REGS);
11402 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11403
11404 if (!in_p)
11405 std::swap (to_type, from_type);
11406
11407 /* Can we do a direct move of some sort? */
11408 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11409 altivec_p))
11410 {
11411 icode = (enum insn_code)sri->icode;
11412 default_p = false;
11413 done_p = true;
11414 ret = NO_REGS;
11415 }
11416 }
11417
11418 /* Make sure 0.0 is not reloaded or forced into memory. */
11419 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11420 {
11421 ret = NO_REGS;
11422 default_p = false;
11423 done_p = true;
11424 }
11425
11426 /* If this is a scalar floating point value and we want to load it into the
11427 traditional Altivec registers, do it via a move via a traditional floating
11428 point register, unless we have D-form addressing. Also make sure that
11429 non-zero constants use a FPR. */
11430 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11431 && !mode_supports_vmx_dform (mode)
11432 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11433 && (memory_p || CONST_DOUBLE_P (x)))
11434 {
11435 ret = FLOAT_REGS;
11436 default_p = false;
11437 done_p = true;
11438 }
11439
11440 /* Handle reload of load/stores if we have reload helper functions. */
11441 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11442 {
11443 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11444 mode);
11445
11446 if (extra_cost >= 0)
11447 {
11448 done_p = true;
11449 ret = NO_REGS;
11450 if (extra_cost > 0)
11451 {
11452 sri->extra_cost = extra_cost;
11453 sri->icode = icode;
11454 }
11455 }
11456 }
11457
11458 /* Handle unaligned loads and stores of integer registers. */
11459 if (!done_p && TARGET_POWERPC64
11460 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11461 && memory_p
11462 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11463 {
11464 rtx addr = XEXP (x, 0);
11465 rtx off = address_offset (addr);
11466
11467 if (off != NULL_RTX)
11468 {
11469 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11470 unsigned HOST_WIDE_INT offset = INTVAL (off);
11471
11472 /* We need a secondary reload when our legitimate_address_p
11473 says the address is good (as otherwise the entire address
11474 will be reloaded), and the offset is not a multiple of
11475 four or we have an address wrap. Address wrap will only
11476 occur for LO_SUMs since legitimate_offset_address_p
11477 rejects addresses for 16-byte mems that will wrap. */
11478 if (GET_CODE (addr) == LO_SUM
11479 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11480 && ((offset & 3) != 0
11481 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11482 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11483 && (offset & 3) != 0))
11484 {
11485 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11486 if (in_p)
11487 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11488 : CODE_FOR_reload_di_load);
11489 else
11490 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11491 : CODE_FOR_reload_di_store);
11492 sri->extra_cost = 2;
11493 ret = NO_REGS;
11494 done_p = true;
11495 }
11496 else
11497 default_p = true;
11498 }
11499 else
11500 default_p = true;
11501 }
11502
11503 if (!done_p && !TARGET_POWERPC64
11504 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11505 && memory_p
11506 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11507 {
11508 rtx addr = XEXP (x, 0);
11509 rtx off = address_offset (addr);
11510
11511 if (off != NULL_RTX)
11512 {
11513 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11514 unsigned HOST_WIDE_INT offset = INTVAL (off);
11515
11516 /* We need a secondary reload when our legitimate_address_p
11517 says the address is good (as otherwise the entire address
11518 will be reloaded), and we have a wrap.
11519
11520 legitimate_lo_sum_address_p allows LO_SUM addresses to
11521 have any offset so test for wrap in the low 16 bits.
11522
11523 legitimate_offset_address_p checks for the range
11524 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11525 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11526 [0x7ff4,0x7fff] respectively, so test for the
11527 intersection of these ranges, [0x7ffc,0x7fff] and
11528 [0x7ff4,0x7ff7] respectively.
11529
11530 Note that the address we see here may have been
11531 manipulated by legitimize_reload_address. */
11532 if (GET_CODE (addr) == LO_SUM
11533 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11534 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11535 {
11536 if (in_p)
11537 sri->icode = CODE_FOR_reload_si_load;
11538 else
11539 sri->icode = CODE_FOR_reload_si_store;
11540 sri->extra_cost = 2;
11541 ret = NO_REGS;
11542 done_p = true;
11543 }
11544 else
11545 default_p = true;
11546 }
11547 else
11548 default_p = true;
11549 }
11550
11551 if (!done_p)
11552 default_p = true;
11553
11554 if (default_p)
11555 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11556
11557 gcc_assert (ret != ALL_REGS);
11558
11559 if (TARGET_DEBUG_ADDR)
11560 {
11561 fprintf (stderr,
11562 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11563 "mode = %s",
11564 reg_class_names[ret],
11565 in_p ? "true" : "false",
11566 reg_class_names[rclass],
11567 GET_MODE_NAME (mode));
11568
11569 if (reload_completed)
11570 fputs (", after reload", stderr);
11571
11572 if (!done_p)
11573 fputs (", done_p not set", stderr);
11574
11575 if (default_p)
11576 fputs (", default secondary reload", stderr);
11577
11578 if (sri->icode != CODE_FOR_nothing)
11579 fprintf (stderr, ", reload func = %s, extra cost = %d",
11580 insn_data[sri->icode].name, sri->extra_cost);
11581
11582 else if (sri->extra_cost > 0)
11583 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11584
11585 fputs ("\n", stderr);
11586 debug_rtx (x);
11587 }
11588
11589 return ret;
11590 }
11591
11592 /* Better tracing for rs6000_secondary_reload_inner. */
11593
11594 static void
11595 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11596 bool store_p)
11597 {
11598 rtx set, clobber;
11599
11600 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11601
11602 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11603 store_p ? "store" : "load");
11604
11605 if (store_p)
11606 set = gen_rtx_SET (mem, reg);
11607 else
11608 set = gen_rtx_SET (reg, mem);
11609
11610 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11611 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11612 }
11613
11614 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11615 ATTRIBUTE_NORETURN;
11616
11617 static void
11618 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11619 bool store_p)
11620 {
11621 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11622 gcc_unreachable ();
11623 }
11624
11625 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11626 reload helper functions. These were identified in
11627 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11628 reload, it calls the insns:
11629 reload_<RELOAD:mode>_<P:mptrsize>_store
11630 reload_<RELOAD:mode>_<P:mptrsize>_load
11631
11632 which in turn calls this function, to do whatever is necessary to create
11633 valid addresses. */
11634
11635 void
11636 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11637 {
11638 int regno = true_regnum (reg);
11639 machine_mode mode = GET_MODE (reg);
11640 addr_mask_type addr_mask;
11641 rtx addr;
11642 rtx new_addr;
11643 rtx op_reg, op0, op1;
11644 rtx and_op;
11645 rtx cc_clobber;
11646 rtvec rv;
11647
11648 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11649 || !base_reg_operand (scratch, GET_MODE (scratch)))
11650 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11651
11652 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11653 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11654
11655 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11656 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11657
11658 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11659 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11660
11661 else
11662 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11663
11664 /* Make sure the mode is valid in this register class. */
11665 if ((addr_mask & RELOAD_REG_VALID) == 0)
11666 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11667
11668 if (TARGET_DEBUG_ADDR)
11669 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11670
11671 new_addr = addr = XEXP (mem, 0);
11672 switch (GET_CODE (addr))
11673 {
11674 /* Does the register class support auto update forms for this mode? If
11675 not, do the update now. We don't need a scratch register, since the
11676 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11677 case PRE_INC:
11678 case PRE_DEC:
11679 op_reg = XEXP (addr, 0);
11680 if (!base_reg_operand (op_reg, Pmode))
11681 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11682
11683 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11684 {
11685 int delta = GET_MODE_SIZE (mode);
11686 if (GET_CODE (addr) == PRE_DEC)
11687 delta = -delta;
11688 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11689 new_addr = op_reg;
11690 }
11691 break;
11692
11693 case PRE_MODIFY:
11694 op0 = XEXP (addr, 0);
11695 op1 = XEXP (addr, 1);
11696 if (!base_reg_operand (op0, Pmode)
11697 || GET_CODE (op1) != PLUS
11698 || !rtx_equal_p (op0, XEXP (op1, 0)))
11699 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11700
11701 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11702 {
11703 emit_insn (gen_rtx_SET (op0, op1));
11704 new_addr = reg;
11705 }
11706 break;
11707
11708 /* Do we need to simulate AND -16 to clear the bottom address bits used
11709 in VMX load/stores? */
11710 case AND:
11711 op0 = XEXP (addr, 0);
11712 op1 = XEXP (addr, 1);
11713 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11714 {
11715 if (REG_P (op0) || SUBREG_P (op0))
11716 op_reg = op0;
11717
11718 else if (GET_CODE (op1) == PLUS)
11719 {
11720 emit_insn (gen_rtx_SET (scratch, op1));
11721 op_reg = scratch;
11722 }
11723
11724 else
11725 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11726
11727 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11728 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11729 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11730 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11731 new_addr = scratch;
11732 }
11733 break;
11734
11735 /* If this is an indirect address, make sure it is a base register. */
11736 case REG:
11737 case SUBREG:
11738 if (!base_reg_operand (addr, GET_MODE (addr)))
11739 {
11740 emit_insn (gen_rtx_SET (scratch, addr));
11741 new_addr = scratch;
11742 }
11743 break;
11744
11745 /* If this is an indexed address, make sure the register class can handle
11746 indexed addresses for this mode. */
11747 case PLUS:
11748 op0 = XEXP (addr, 0);
11749 op1 = XEXP (addr, 1);
11750 if (!base_reg_operand (op0, Pmode))
11751 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11752
11753 else if (int_reg_operand (op1, Pmode))
11754 {
11755 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11756 {
11757 emit_insn (gen_rtx_SET (scratch, addr));
11758 new_addr = scratch;
11759 }
11760 }
11761
11762 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11763 {
11764 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11765 || !quad_address_p (addr, mode, false))
11766 {
11767 emit_insn (gen_rtx_SET (scratch, addr));
11768 new_addr = scratch;
11769 }
11770 }
11771
11772 /* Make sure the register class can handle offset addresses. */
11773 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11774 {
11775 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11776 {
11777 emit_insn (gen_rtx_SET (scratch, addr));
11778 new_addr = scratch;
11779 }
11780 }
11781
11782 else
11783 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11784
11785 break;
11786
11787 case LO_SUM:
11788 op0 = XEXP (addr, 0);
11789 op1 = XEXP (addr, 1);
11790 if (!base_reg_operand (op0, Pmode))
11791 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11792
11793 else if (int_reg_operand (op1, Pmode))
11794 {
11795 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11796 {
11797 emit_insn (gen_rtx_SET (scratch, addr));
11798 new_addr = scratch;
11799 }
11800 }
11801
11802 /* Quad offsets are restricted and can't handle normal addresses. */
11803 else if (mode_supports_dq_form (mode))
11804 {
11805 emit_insn (gen_rtx_SET (scratch, addr));
11806 new_addr = scratch;
11807 }
11808
11809 /* Make sure the register class can handle offset addresses. */
11810 else if (legitimate_lo_sum_address_p (mode, addr, false))
11811 {
11812 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11813 {
11814 emit_insn (gen_rtx_SET (scratch, addr));
11815 new_addr = scratch;
11816 }
11817 }
11818
11819 else
11820 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11821
11822 break;
11823
11824 case SYMBOL_REF:
11825 case CONST:
11826 case LABEL_REF:
11827 rs6000_emit_move (scratch, addr, Pmode);
11828 new_addr = scratch;
11829 break;
11830
11831 default:
11832 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11833 }
11834
11835 /* Adjust the address if it changed. */
11836 if (addr != new_addr)
11837 {
11838 mem = replace_equiv_address_nv (mem, new_addr);
11839 if (TARGET_DEBUG_ADDR)
11840 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11841 }
11842
11843 /* Now create the move. */
11844 if (store_p)
11845 emit_insn (gen_rtx_SET (mem, reg));
11846 else
11847 emit_insn (gen_rtx_SET (reg, mem));
11848
11849 return;
11850 }
11851
11852 /* Convert reloads involving 64-bit gprs and misaligned offset
11853 addressing, or multiple 32-bit gprs and offsets that are too large,
11854 to use indirect addressing. */
11855
11856 void
11857 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11858 {
11859 int regno = true_regnum (reg);
11860 enum reg_class rclass;
11861 rtx addr;
11862 rtx scratch_or_premodify = scratch;
11863
11864 if (TARGET_DEBUG_ADDR)
11865 {
11866 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11867 store_p ? "store" : "load");
11868 fprintf (stderr, "reg:\n");
11869 debug_rtx (reg);
11870 fprintf (stderr, "mem:\n");
11871 debug_rtx (mem);
11872 fprintf (stderr, "scratch:\n");
11873 debug_rtx (scratch);
11874 }
11875
11876 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11877 gcc_assert (MEM_P (mem));
11878 rclass = REGNO_REG_CLASS (regno);
11879 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11880 addr = XEXP (mem, 0);
11881
11882 if (GET_CODE (addr) == PRE_MODIFY)
11883 {
11884 gcc_assert (REG_P (XEXP (addr, 0))
11885 && GET_CODE (XEXP (addr, 1)) == PLUS
11886 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11887 scratch_or_premodify = XEXP (addr, 0);
11888 addr = XEXP (addr, 1);
11889 }
11890 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11891
11892 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11893
11894 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11895
11896 /* Now create the move. */
11897 if (store_p)
11898 emit_insn (gen_rtx_SET (mem, reg));
11899 else
11900 emit_insn (gen_rtx_SET (reg, mem));
11901
11902 return;
11903 }
11904
11905 /* Given an rtx X being reloaded into a reg required to be
11906 in class CLASS, return the class of reg to actually use.
11907 In general this is just CLASS; but on some machines
11908 in some cases it is preferable to use a more restrictive class.
11909
11910 On the RS/6000, we have to return NO_REGS when we want to reload a
11911 floating-point CONST_DOUBLE to force it to be copied to memory.
11912
11913 We also don't want to reload integer values into floating-point
11914 registers if we can at all help it. In fact, this can
11915 cause reload to die, if it tries to generate a reload of CTR
11916 into a FP register and discovers it doesn't have the memory location
11917 required.
11918
11919 ??? Would it be a good idea to have reload do the converse, that is
11920 try to reload floating modes into FP registers if possible?
11921 */
11922
11923 static enum reg_class
11924 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11925 {
11926 machine_mode mode = GET_MODE (x);
11927 bool is_constant = CONSTANT_P (x);
11928
11929 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11930 reload class for it. */
11931 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11932 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
11933 return NO_REGS;
11934
11935 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
11936 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
11937 return NO_REGS;
11938
11939 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11940 the reloading of address expressions using PLUS into floating point
11941 registers. */
11942 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
11943 {
11944 if (is_constant)
11945 {
11946 /* Zero is always allowed in all VSX registers. */
11947 if (x == CONST0_RTX (mode))
11948 return rclass;
11949
11950 /* If this is a vector constant that can be formed with a few Altivec
11951 instructions, we want altivec registers. */
11952 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
11953 return ALTIVEC_REGS;
11954
11955 /* If this is an integer constant that can easily be loaded into
11956 vector registers, allow it. */
11957 if (CONST_INT_P (x))
11958 {
11959 HOST_WIDE_INT value = INTVAL (x);
11960
11961 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
11962 2.06 can generate it in the Altivec registers with
11963 VSPLTI<x>. */
11964 if (value == -1)
11965 {
11966 if (TARGET_P8_VECTOR)
11967 return rclass;
11968 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11969 return ALTIVEC_REGS;
11970 else
11971 return NO_REGS;
11972 }
11973
11974 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
11975 a sign extend in the Altivec registers. */
11976 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
11977 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
11978 return ALTIVEC_REGS;
11979 }
11980
11981 /* Force constant to memory. */
11982 return NO_REGS;
11983 }
11984
11985 /* D-form addressing can easily reload the value. */
11986 if (mode_supports_vmx_dform (mode)
11987 || mode_supports_dq_form (mode))
11988 return rclass;
11989
11990 /* If this is a scalar floating point value and we don't have D-form
11991 addressing, prefer the traditional floating point registers so that we
11992 can use D-form (register+offset) addressing. */
11993 if (rclass == VSX_REGS
11994 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
11995 return FLOAT_REGS;
11996
11997 /* Prefer the Altivec registers if Altivec is handling the vector
11998 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
11999 loads. */
12000 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12001 || mode == V1TImode)
12002 return ALTIVEC_REGS;
12003
12004 return rclass;
12005 }
12006
12007 if (is_constant || GET_CODE (x) == PLUS)
12008 {
12009 if (reg_class_subset_p (GENERAL_REGS, rclass))
12010 return GENERAL_REGS;
12011 if (reg_class_subset_p (BASE_REGS, rclass))
12012 return BASE_REGS;
12013 return NO_REGS;
12014 }
12015
12016 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12017 return GENERAL_REGS;
12018
12019 return rclass;
12020 }
12021
12022 /* Debug version of rs6000_preferred_reload_class. */
12023 static enum reg_class
12024 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12025 {
12026 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12027
12028 fprintf (stderr,
12029 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12030 "mode = %s, x:\n",
12031 reg_class_names[ret], reg_class_names[rclass],
12032 GET_MODE_NAME (GET_MODE (x)));
12033 debug_rtx (x);
12034
12035 return ret;
12036 }
12037
12038 /* If we are copying between FP or AltiVec registers and anything else, we need
12039 a memory location. The exception is when we are targeting ppc64 and the
12040 move to/from fpr to gpr instructions are available. Also, under VSX, you
12041 can copy vector registers from the FP register set to the Altivec register
12042 set and vice versa. */
12043
12044 static bool
12045 rs6000_secondary_memory_needed (machine_mode mode,
12046 reg_class_t from_class,
12047 reg_class_t to_class)
12048 {
12049 enum rs6000_reg_type from_type, to_type;
12050 bool altivec_p = ((from_class == ALTIVEC_REGS)
12051 || (to_class == ALTIVEC_REGS));
12052
12053 /* If a simple/direct move is available, we don't need secondary memory */
12054 from_type = reg_class_to_reg_type[(int)from_class];
12055 to_type = reg_class_to_reg_type[(int)to_class];
12056
12057 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12058 (secondary_reload_info *)0, altivec_p))
12059 return false;
12060
12061 /* If we have a floating point or vector register class, we need to use
12062 memory to transfer the data. */
12063 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12064 return true;
12065
12066 return false;
12067 }
12068
12069 /* Debug version of rs6000_secondary_memory_needed. */
12070 static bool
12071 rs6000_debug_secondary_memory_needed (machine_mode mode,
12072 reg_class_t from_class,
12073 reg_class_t to_class)
12074 {
12075 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12076
12077 fprintf (stderr,
12078 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12079 "to_class = %s, mode = %s\n",
12080 ret ? "true" : "false",
12081 reg_class_names[from_class],
12082 reg_class_names[to_class],
12083 GET_MODE_NAME (mode));
12084
12085 return ret;
12086 }
12087
12088 /* Return the register class of a scratch register needed to copy IN into
12089 or out of a register in RCLASS in MODE. If it can be done directly,
12090 NO_REGS is returned. */
12091
12092 static enum reg_class
12093 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12094 rtx in)
12095 {
12096 int regno;
12097
12098 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12099 #if TARGET_MACHO
12100 && MACHOPIC_INDIRECT
12101 #endif
12102 ))
12103 {
12104 /* We cannot copy a symbolic operand directly into anything
12105 other than BASE_REGS for TARGET_ELF. So indicate that a
12106 register from BASE_REGS is needed as an intermediate
12107 register.
12108
12109 On Darwin, pic addresses require a load from memory, which
12110 needs a base register. */
12111 if (rclass != BASE_REGS
12112 && (SYMBOL_REF_P (in)
12113 || GET_CODE (in) == HIGH
12114 || GET_CODE (in) == LABEL_REF
12115 || GET_CODE (in) == CONST))
12116 return BASE_REGS;
12117 }
12118
12119 if (REG_P (in))
12120 {
12121 regno = REGNO (in);
12122 if (!HARD_REGISTER_NUM_P (regno))
12123 {
12124 regno = true_regnum (in);
12125 if (!HARD_REGISTER_NUM_P (regno))
12126 regno = -1;
12127 }
12128 }
12129 else if (SUBREG_P (in))
12130 {
12131 regno = true_regnum (in);
12132 if (!HARD_REGISTER_NUM_P (regno))
12133 regno = -1;
12134 }
12135 else
12136 regno = -1;
12137
12138 /* If we have VSX register moves, prefer moving scalar values between
12139 Altivec registers and GPR by going via an FPR (and then via memory)
12140 instead of reloading the secondary memory address for Altivec moves. */
12141 if (TARGET_VSX
12142 && GET_MODE_SIZE (mode) < 16
12143 && !mode_supports_vmx_dform (mode)
12144 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12145 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12146 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12147 && (regno >= 0 && INT_REGNO_P (regno)))))
12148 return FLOAT_REGS;
12149
12150 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12151 into anything. */
12152 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12153 || (regno >= 0 && INT_REGNO_P (regno)))
12154 return NO_REGS;
12155
12156 /* Constants, memory, and VSX registers can go into VSX registers (both the
12157 traditional floating point and the altivec registers). */
12158 if (rclass == VSX_REGS
12159 && (regno == -1 || VSX_REGNO_P (regno)))
12160 return NO_REGS;
12161
12162 /* Constants, memory, and FP registers can go into FP registers. */
12163 if ((regno == -1 || FP_REGNO_P (regno))
12164 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12165 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12166
12167 /* Memory, and AltiVec registers can go into AltiVec registers. */
12168 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12169 && rclass == ALTIVEC_REGS)
12170 return NO_REGS;
12171
12172 /* We can copy among the CR registers. */
12173 if ((rclass == CR_REGS || rclass == CR0_REGS)
12174 && regno >= 0 && CR_REGNO_P (regno))
12175 return NO_REGS;
12176
12177 /* Otherwise, we need GENERAL_REGS. */
12178 return GENERAL_REGS;
12179 }
12180
12181 /* Debug version of rs6000_secondary_reload_class. */
12182 static enum reg_class
12183 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12184 machine_mode mode, rtx in)
12185 {
12186 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12187 fprintf (stderr,
12188 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12189 "mode = %s, input rtx:\n",
12190 reg_class_names[ret], reg_class_names[rclass],
12191 GET_MODE_NAME (mode));
12192 debug_rtx (in);
12193
12194 return ret;
12195 }
12196
12197 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12198
12199 static bool
12200 rs6000_can_change_mode_class (machine_mode from,
12201 machine_mode to,
12202 reg_class_t rclass)
12203 {
12204 unsigned from_size = GET_MODE_SIZE (from);
12205 unsigned to_size = GET_MODE_SIZE (to);
12206
12207 if (from_size != to_size)
12208 {
12209 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12210
12211 if (reg_classes_intersect_p (xclass, rclass))
12212 {
12213 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12214 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12215 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12216 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12217
12218 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12219 single register under VSX because the scalar part of the register
12220 is in the upper 64-bits, and not the lower 64-bits. Types like
12221 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12222 IEEE floating point can't overlap, and neither can small
12223 values. */
12224
12225 if (to_float128_vector_p && from_float128_vector_p)
12226 return true;
12227
12228 else if (to_float128_vector_p || from_float128_vector_p)
12229 return false;
12230
12231 /* TDmode in floating-mode registers must always go into a register
12232 pair with the most significant word in the even-numbered register
12233 to match ISA requirements. In little-endian mode, this does not
12234 match subreg numbering, so we cannot allow subregs. */
12235 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12236 return false;
12237
12238 if (from_size < 8 || to_size < 8)
12239 return false;
12240
12241 if (from_size == 8 && (8 * to_nregs) != to_size)
12242 return false;
12243
12244 if (to_size == 8 && (8 * from_nregs) != from_size)
12245 return false;
12246
12247 return true;
12248 }
12249 else
12250 return true;
12251 }
12252
12253 /* Since the VSX register set includes traditional floating point registers
12254 and altivec registers, just check for the size being different instead of
12255 trying to check whether the modes are vector modes. Otherwise it won't
12256 allow say DF and DI to change classes. For types like TFmode and TDmode
12257 that take 2 64-bit registers, rather than a single 128-bit register, don't
12258 allow subregs of those types to other 128 bit types. */
12259 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12260 {
12261 unsigned num_regs = (from_size + 15) / 16;
12262 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12263 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12264 return false;
12265
12266 return (from_size == 8 || from_size == 16);
12267 }
12268
12269 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12270 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12271 return false;
12272
12273 return true;
12274 }
12275
12276 /* Debug version of rs6000_can_change_mode_class. */
12277 static bool
12278 rs6000_debug_can_change_mode_class (machine_mode from,
12279 machine_mode to,
12280 reg_class_t rclass)
12281 {
12282 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12283
12284 fprintf (stderr,
12285 "rs6000_can_change_mode_class, return %s, from = %s, "
12286 "to = %s, rclass = %s\n",
12287 ret ? "true" : "false",
12288 GET_MODE_NAME (from), GET_MODE_NAME (to),
12289 reg_class_names[rclass]);
12290
12291 return ret;
12292 }
12293 \f
12294 /* Return a string to do a move operation of 128 bits of data. */
12295
12296 const char *
12297 rs6000_output_move_128bit (rtx operands[])
12298 {
12299 rtx dest = operands[0];
12300 rtx src = operands[1];
12301 machine_mode mode = GET_MODE (dest);
12302 int dest_regno;
12303 int src_regno;
12304 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12305 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12306
12307 if (REG_P (dest))
12308 {
12309 dest_regno = REGNO (dest);
12310 dest_gpr_p = INT_REGNO_P (dest_regno);
12311 dest_fp_p = FP_REGNO_P (dest_regno);
12312 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12313 dest_vsx_p = dest_fp_p | dest_vmx_p;
12314 }
12315 else
12316 {
12317 dest_regno = -1;
12318 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12319 }
12320
12321 if (REG_P (src))
12322 {
12323 src_regno = REGNO (src);
12324 src_gpr_p = INT_REGNO_P (src_regno);
12325 src_fp_p = FP_REGNO_P (src_regno);
12326 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12327 src_vsx_p = src_fp_p | src_vmx_p;
12328 }
12329 else
12330 {
12331 src_regno = -1;
12332 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12333 }
12334
12335 /* Register moves. */
12336 if (dest_regno >= 0 && src_regno >= 0)
12337 {
12338 if (dest_gpr_p)
12339 {
12340 if (src_gpr_p)
12341 return "#";
12342
12343 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12344 return (WORDS_BIG_ENDIAN
12345 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12346 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12347
12348 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12349 return "#";
12350 }
12351
12352 else if (TARGET_VSX && dest_vsx_p)
12353 {
12354 if (src_vsx_p)
12355 return "xxlor %x0,%x1,%x1";
12356
12357 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12358 return (WORDS_BIG_ENDIAN
12359 ? "mtvsrdd %x0,%1,%L1"
12360 : "mtvsrdd %x0,%L1,%1");
12361
12362 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12363 return "#";
12364 }
12365
12366 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12367 return "vor %0,%1,%1";
12368
12369 else if (dest_fp_p && src_fp_p)
12370 return "#";
12371 }
12372
12373 /* Loads. */
12374 else if (dest_regno >= 0 && MEM_P (src))
12375 {
12376 if (dest_gpr_p)
12377 {
12378 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12379 return "lq %0,%1";
12380 else
12381 return "#";
12382 }
12383
12384 else if (TARGET_ALTIVEC && dest_vmx_p
12385 && altivec_indexed_or_indirect_operand (src, mode))
12386 return "lvx %0,%y1";
12387
12388 else if (TARGET_VSX && dest_vsx_p)
12389 {
12390 if (mode_supports_dq_form (mode)
12391 && quad_address_p (XEXP (src, 0), mode, true))
12392 return "lxv %x0,%1";
12393
12394 else if (TARGET_P9_VECTOR)
12395 return "lxvx %x0,%y1";
12396
12397 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12398 return "lxvw4x %x0,%y1";
12399
12400 else
12401 return "lxvd2x %x0,%y1";
12402 }
12403
12404 else if (TARGET_ALTIVEC && dest_vmx_p)
12405 return "lvx %0,%y1";
12406
12407 else if (dest_fp_p)
12408 return "#";
12409 }
12410
12411 /* Stores. */
12412 else if (src_regno >= 0 && MEM_P (dest))
12413 {
12414 if (src_gpr_p)
12415 {
12416 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12417 return "stq %1,%0";
12418 else
12419 return "#";
12420 }
12421
12422 else if (TARGET_ALTIVEC && src_vmx_p
12423 && altivec_indexed_or_indirect_operand (dest, mode))
12424 return "stvx %1,%y0";
12425
12426 else if (TARGET_VSX && src_vsx_p)
12427 {
12428 if (mode_supports_dq_form (mode)
12429 && quad_address_p (XEXP (dest, 0), mode, true))
12430 return "stxv %x1,%0";
12431
12432 else if (TARGET_P9_VECTOR)
12433 return "stxvx %x1,%y0";
12434
12435 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12436 return "stxvw4x %x1,%y0";
12437
12438 else
12439 return "stxvd2x %x1,%y0";
12440 }
12441
12442 else if (TARGET_ALTIVEC && src_vmx_p)
12443 return "stvx %1,%y0";
12444
12445 else if (src_fp_p)
12446 return "#";
12447 }
12448
12449 /* Constants. */
12450 else if (dest_regno >= 0
12451 && (CONST_INT_P (src)
12452 || CONST_WIDE_INT_P (src)
12453 || CONST_DOUBLE_P (src)
12454 || GET_CODE (src) == CONST_VECTOR))
12455 {
12456 if (dest_gpr_p)
12457 return "#";
12458
12459 else if ((dest_vmx_p && TARGET_ALTIVEC)
12460 || (dest_vsx_p && TARGET_VSX))
12461 return output_vec_const_move (operands);
12462 }
12463
12464 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12465 }
12466
12467 /* Validate a 128-bit move. */
12468 bool
12469 rs6000_move_128bit_ok_p (rtx operands[])
12470 {
12471 machine_mode mode = GET_MODE (operands[0]);
12472 return (gpc_reg_operand (operands[0], mode)
12473 || gpc_reg_operand (operands[1], mode));
12474 }
12475
12476 /* Return true if a 128-bit move needs to be split. */
12477 bool
12478 rs6000_split_128bit_ok_p (rtx operands[])
12479 {
12480 if (!reload_completed)
12481 return false;
12482
12483 if (!gpr_or_gpr_p (operands[0], operands[1]))
12484 return false;
12485
12486 if (quad_load_store_p (operands[0], operands[1]))
12487 return false;
12488
12489 return true;
12490 }
12491
12492 \f
12493 /* Given a comparison operation, return the bit number in CCR to test. We
12494 know this is a valid comparison.
12495
12496 SCC_P is 1 if this is for an scc. That means that %D will have been
12497 used instead of %C, so the bits will be in different places.
12498
12499 Return -1 if OP isn't a valid comparison for some reason. */
12500
12501 int
12502 ccr_bit (rtx op, int scc_p)
12503 {
12504 enum rtx_code code = GET_CODE (op);
12505 machine_mode cc_mode;
12506 int cc_regnum;
12507 int base_bit;
12508 rtx reg;
12509
12510 if (!COMPARISON_P (op))
12511 return -1;
12512
12513 reg = XEXP (op, 0);
12514
12515 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12516 return -1;
12517
12518 cc_mode = GET_MODE (reg);
12519 cc_regnum = REGNO (reg);
12520 base_bit = 4 * (cc_regnum - CR0_REGNO);
12521
12522 validate_condition_mode (code, cc_mode);
12523
12524 /* When generating a sCOND operation, only positive conditions are
12525 allowed. */
12526 if (scc_p)
12527 switch (code)
12528 {
12529 case EQ:
12530 case GT:
12531 case LT:
12532 case UNORDERED:
12533 case GTU:
12534 case LTU:
12535 break;
12536 default:
12537 return -1;
12538 }
12539
12540 switch (code)
12541 {
12542 case NE:
12543 return scc_p ? base_bit + 3 : base_bit + 2;
12544 case EQ:
12545 return base_bit + 2;
12546 case GT: case GTU: case UNLE:
12547 return base_bit + 1;
12548 case LT: case LTU: case UNGE:
12549 return base_bit;
12550 case ORDERED: case UNORDERED:
12551 return base_bit + 3;
12552
12553 case GE: case GEU:
12554 /* If scc, we will have done a cror to put the bit in the
12555 unordered position. So test that bit. For integer, this is ! LT
12556 unless this is an scc insn. */
12557 return scc_p ? base_bit + 3 : base_bit;
12558
12559 case LE: case LEU:
12560 return scc_p ? base_bit + 3 : base_bit + 1;
12561
12562 default:
12563 return -1;
12564 }
12565 }
12566 \f
12567 /* Return the GOT register. */
12568
12569 rtx
12570 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12571 {
12572 /* The second flow pass currently (June 1999) can't update
12573 regs_ever_live without disturbing other parts of the compiler, so
12574 update it here to make the prolog/epilogue code happy. */
12575 if (!can_create_pseudo_p ()
12576 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12577 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12578
12579 crtl->uses_pic_offset_table = 1;
12580
12581 return pic_offset_table_rtx;
12582 }
12583 \f
12584 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12585
12586 /* Write out a function code label. */
12587
12588 void
12589 rs6000_output_function_entry (FILE *file, const char *fname)
12590 {
12591 if (fname[0] != '.')
12592 {
12593 switch (DEFAULT_ABI)
12594 {
12595 default:
12596 gcc_unreachable ();
12597
12598 case ABI_AIX:
12599 if (DOT_SYMBOLS)
12600 putc ('.', file);
12601 else
12602 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12603 break;
12604
12605 case ABI_ELFv2:
12606 case ABI_V4:
12607 case ABI_DARWIN:
12608 break;
12609 }
12610 }
12611
12612 RS6000_OUTPUT_BASENAME (file, fname);
12613 }
12614
12615 /* Print an operand. Recognize special options, documented below. */
12616
12617 #if TARGET_ELF
12618 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12619 only introduced by the linker, when applying the sda21
12620 relocation. */
12621 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12622 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12623 #else
12624 #define SMALL_DATA_RELOC "sda21"
12625 #define SMALL_DATA_REG 0
12626 #endif
12627
12628 void
12629 print_operand (FILE *file, rtx x, int code)
12630 {
12631 int i;
12632 unsigned HOST_WIDE_INT uval;
12633
12634 switch (code)
12635 {
12636 /* %a is output_address. */
12637
12638 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12639 output_operand. */
12640
12641 case 'D':
12642 /* Like 'J' but get to the GT bit only. */
12643 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12644 {
12645 output_operand_lossage ("invalid %%D value");
12646 return;
12647 }
12648
12649 /* Bit 1 is GT bit. */
12650 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12651
12652 /* Add one for shift count in rlinm for scc. */
12653 fprintf (file, "%d", i + 1);
12654 return;
12655
12656 case 'e':
12657 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12658 if (! INT_P (x))
12659 {
12660 output_operand_lossage ("invalid %%e value");
12661 return;
12662 }
12663
12664 uval = INTVAL (x);
12665 if ((uval & 0xffff) == 0 && uval != 0)
12666 putc ('s', file);
12667 return;
12668
12669 case 'E':
12670 /* X is a CR register. Print the number of the EQ bit of the CR */
12671 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12672 output_operand_lossage ("invalid %%E value");
12673 else
12674 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12675 return;
12676
12677 case 'f':
12678 /* X is a CR register. Print the shift count needed to move it
12679 to the high-order four bits. */
12680 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12681 output_operand_lossage ("invalid %%f value");
12682 else
12683 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12684 return;
12685
12686 case 'F':
12687 /* Similar, but print the count for the rotate in the opposite
12688 direction. */
12689 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12690 output_operand_lossage ("invalid %%F value");
12691 else
12692 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12693 return;
12694
12695 case 'G':
12696 /* X is a constant integer. If it is negative, print "m",
12697 otherwise print "z". This is to make an aze or ame insn. */
12698 if (!CONST_INT_P (x))
12699 output_operand_lossage ("invalid %%G value");
12700 else if (INTVAL (x) >= 0)
12701 putc ('z', file);
12702 else
12703 putc ('m', file);
12704 return;
12705
12706 case 'h':
12707 /* If constant, output low-order five bits. Otherwise, write
12708 normally. */
12709 if (INT_P (x))
12710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12711 else
12712 print_operand (file, x, 0);
12713 return;
12714
12715 case 'H':
12716 /* If constant, output low-order six bits. Otherwise, write
12717 normally. */
12718 if (INT_P (x))
12719 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12720 else
12721 print_operand (file, x, 0);
12722 return;
12723
12724 case 'I':
12725 /* Print `i' if this is a constant, else nothing. */
12726 if (INT_P (x))
12727 putc ('i', file);
12728 return;
12729
12730 case 'j':
12731 /* Write the bit number in CCR for jump. */
12732 i = ccr_bit (x, 0);
12733 if (i == -1)
12734 output_operand_lossage ("invalid %%j code");
12735 else
12736 fprintf (file, "%d", i);
12737 return;
12738
12739 case 'J':
12740 /* Similar, but add one for shift count in rlinm for scc and pass
12741 scc flag to `ccr_bit'. */
12742 i = ccr_bit (x, 1);
12743 if (i == -1)
12744 output_operand_lossage ("invalid %%J code");
12745 else
12746 /* If we want bit 31, write a shift count of zero, not 32. */
12747 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12748 return;
12749
12750 case 'k':
12751 /* X must be a constant. Write the 1's complement of the
12752 constant. */
12753 if (! INT_P (x))
12754 output_operand_lossage ("invalid %%k value");
12755 else
12756 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12757 return;
12758
12759 case 'K':
12760 /* X must be a symbolic constant on ELF. Write an
12761 expression suitable for an 'addi' that adds in the low 16
12762 bits of the MEM. */
12763 if (GET_CODE (x) == CONST)
12764 {
12765 if (GET_CODE (XEXP (x, 0)) != PLUS
12766 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12767 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12768 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12769 output_operand_lossage ("invalid %%K value");
12770 }
12771 print_operand_address (file, x);
12772 fputs ("@l", file);
12773 return;
12774
12775 /* %l is output_asm_label. */
12776
12777 case 'L':
12778 /* Write second word of DImode or DFmode reference. Works on register
12779 or non-indexed memory only. */
12780 if (REG_P (x))
12781 fputs (reg_names[REGNO (x) + 1], file);
12782 else if (MEM_P (x))
12783 {
12784 machine_mode mode = GET_MODE (x);
12785 /* Handle possible auto-increment. Since it is pre-increment and
12786 we have already done it, we can just use an offset of word. */
12787 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12788 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12789 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12790 UNITS_PER_WORD));
12791 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12792 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12793 UNITS_PER_WORD));
12794 else
12795 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12796 UNITS_PER_WORD),
12797 0));
12798
12799 if (small_data_operand (x, GET_MODE (x)))
12800 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12801 reg_names[SMALL_DATA_REG]);
12802 }
12803 return;
12804
12805 case 'N': /* Unused */
12806 /* Write the number of elements in the vector times 4. */
12807 if (GET_CODE (x) != PARALLEL)
12808 output_operand_lossage ("invalid %%N value");
12809 else
12810 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12811 return;
12812
12813 case 'O': /* Unused */
12814 /* Similar, but subtract 1 first. */
12815 if (GET_CODE (x) != PARALLEL)
12816 output_operand_lossage ("invalid %%O value");
12817 else
12818 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12819 return;
12820
12821 case 'p':
12822 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12823 if (! INT_P (x)
12824 || INTVAL (x) < 0
12825 || (i = exact_log2 (INTVAL (x))) < 0)
12826 output_operand_lossage ("invalid %%p value");
12827 else
12828 fprintf (file, "%d", i);
12829 return;
12830
12831 case 'P':
12832 /* The operand must be an indirect memory reference. The result
12833 is the register name. */
12834 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12835 || REGNO (XEXP (x, 0)) >= 32)
12836 output_operand_lossage ("invalid %%P value");
12837 else
12838 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12839 return;
12840
12841 case 'q':
12842 /* This outputs the logical code corresponding to a boolean
12843 expression. The expression may have one or both operands
12844 negated (if one, only the first one). For condition register
12845 logical operations, it will also treat the negated
12846 CR codes as NOTs, but not handle NOTs of them. */
12847 {
12848 const char *const *t = 0;
12849 const char *s;
12850 enum rtx_code code = GET_CODE (x);
12851 static const char * const tbl[3][3] = {
12852 { "and", "andc", "nor" },
12853 { "or", "orc", "nand" },
12854 { "xor", "eqv", "xor" } };
12855
12856 if (code == AND)
12857 t = tbl[0];
12858 else if (code == IOR)
12859 t = tbl[1];
12860 else if (code == XOR)
12861 t = tbl[2];
12862 else
12863 output_operand_lossage ("invalid %%q value");
12864
12865 if (GET_CODE (XEXP (x, 0)) != NOT)
12866 s = t[0];
12867 else
12868 {
12869 if (GET_CODE (XEXP (x, 1)) == NOT)
12870 s = t[2];
12871 else
12872 s = t[1];
12873 }
12874
12875 fputs (s, file);
12876 }
12877 return;
12878
12879 case 'Q':
12880 if (! TARGET_MFCRF)
12881 return;
12882 fputc (',', file);
12883 /* FALLTHRU */
12884
12885 case 'R':
12886 /* X is a CR register. Print the mask for `mtcrf'. */
12887 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12888 output_operand_lossage ("invalid %%R value");
12889 else
12890 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12891 return;
12892
12893 case 's':
12894 /* Low 5 bits of 32 - value */
12895 if (! INT_P (x))
12896 output_operand_lossage ("invalid %%s value");
12897 else
12898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12899 return;
12900
12901 case 't':
12902 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12903 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12904 {
12905 output_operand_lossage ("invalid %%t value");
12906 return;
12907 }
12908
12909 /* Bit 3 is OV bit. */
12910 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12911
12912 /* If we want bit 31, write a shift count of zero, not 32. */
12913 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12914 return;
12915
12916 case 'T':
12917 /* Print the symbolic name of a branch target register. */
12918 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12919 x = XVECEXP (x, 0, 0);
12920 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12921 && REGNO (x) != CTR_REGNO))
12922 output_operand_lossage ("invalid %%T value");
12923 else if (REGNO (x) == LR_REGNO)
12924 fputs ("lr", file);
12925 else
12926 fputs ("ctr", file);
12927 return;
12928
12929 case 'u':
12930 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12931 for use in unsigned operand. */
12932 if (! INT_P (x))
12933 {
12934 output_operand_lossage ("invalid %%u value");
12935 return;
12936 }
12937
12938 uval = INTVAL (x);
12939 if ((uval & 0xffff) == 0)
12940 uval >>= 16;
12941
12942 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
12943 return;
12944
12945 case 'v':
12946 /* High-order 16 bits of constant for use in signed operand. */
12947 if (! INT_P (x))
12948 output_operand_lossage ("invalid %%v value");
12949 else
12950 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
12951 (INTVAL (x) >> 16) & 0xffff);
12952 return;
12953
12954 case 'U':
12955 /* Print `u' if this has an auto-increment or auto-decrement. */
12956 if (MEM_P (x)
12957 && (GET_CODE (XEXP (x, 0)) == PRE_INC
12958 || GET_CODE (XEXP (x, 0)) == PRE_DEC
12959 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
12960 putc ('u', file);
12961 return;
12962
12963 case 'V':
12964 /* Print the trap code for this operand. */
12965 switch (GET_CODE (x))
12966 {
12967 case EQ:
12968 fputs ("eq", file); /* 4 */
12969 break;
12970 case NE:
12971 fputs ("ne", file); /* 24 */
12972 break;
12973 case LT:
12974 fputs ("lt", file); /* 16 */
12975 break;
12976 case LE:
12977 fputs ("le", file); /* 20 */
12978 break;
12979 case GT:
12980 fputs ("gt", file); /* 8 */
12981 break;
12982 case GE:
12983 fputs ("ge", file); /* 12 */
12984 break;
12985 case LTU:
12986 fputs ("llt", file); /* 2 */
12987 break;
12988 case LEU:
12989 fputs ("lle", file); /* 6 */
12990 break;
12991 case GTU:
12992 fputs ("lgt", file); /* 1 */
12993 break;
12994 case GEU:
12995 fputs ("lge", file); /* 5 */
12996 break;
12997 default:
12998 output_operand_lossage ("invalid %%V value");
12999 }
13000 break;
13001
13002 case 'w':
13003 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13004 normally. */
13005 if (INT_P (x))
13006 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13007 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13008 else
13009 print_operand (file, x, 0);
13010 return;
13011
13012 case 'x':
13013 /* X is a FPR or Altivec register used in a VSX context. */
13014 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13015 output_operand_lossage ("invalid %%x value");
13016 else
13017 {
13018 int reg = REGNO (x);
13019 int vsx_reg = (FP_REGNO_P (reg)
13020 ? reg - 32
13021 : reg - FIRST_ALTIVEC_REGNO + 32);
13022
13023 #ifdef TARGET_REGNAMES
13024 if (TARGET_REGNAMES)
13025 fprintf (file, "%%vs%d", vsx_reg);
13026 else
13027 #endif
13028 fprintf (file, "%d", vsx_reg);
13029 }
13030 return;
13031
13032 case 'X':
13033 if (MEM_P (x)
13034 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13035 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13036 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13037 putc ('x', file);
13038 return;
13039
13040 case 'Y':
13041 /* Like 'L', for third word of TImode/PTImode */
13042 if (REG_P (x))
13043 fputs (reg_names[REGNO (x) + 2], file);
13044 else if (MEM_P (x))
13045 {
13046 machine_mode mode = GET_MODE (x);
13047 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13048 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13049 output_address (mode, plus_constant (Pmode,
13050 XEXP (XEXP (x, 0), 0), 8));
13051 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13052 output_address (mode, plus_constant (Pmode,
13053 XEXP (XEXP (x, 0), 0), 8));
13054 else
13055 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13056 if (small_data_operand (x, GET_MODE (x)))
13057 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13058 reg_names[SMALL_DATA_REG]);
13059 }
13060 return;
13061
13062 case 'z':
13063 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13064 x = XVECEXP (x, 0, 1);
13065 /* X is a SYMBOL_REF. Write out the name preceded by a
13066 period and without any trailing data in brackets. Used for function
13067 names. If we are configured for System V (or the embedded ABI) on
13068 the PowerPC, do not emit the period, since those systems do not use
13069 TOCs and the like. */
13070 if (!SYMBOL_REF_P (x))
13071 {
13072 output_operand_lossage ("invalid %%z value");
13073 return;
13074 }
13075
13076 /* For macho, check to see if we need a stub. */
13077 if (TARGET_MACHO)
13078 {
13079 const char *name = XSTR (x, 0);
13080 #if TARGET_MACHO
13081 if (darwin_symbol_stubs
13082 && MACHOPIC_INDIRECT
13083 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13084 name = machopic_indirection_name (x, /*stub_p=*/true);
13085 #endif
13086 assemble_name (file, name);
13087 }
13088 else if (!DOT_SYMBOLS)
13089 assemble_name (file, XSTR (x, 0));
13090 else
13091 rs6000_output_function_entry (file, XSTR (x, 0));
13092 return;
13093
13094 case 'Z':
13095 /* Like 'L', for last word of TImode/PTImode. */
13096 if (REG_P (x))
13097 fputs (reg_names[REGNO (x) + 3], file);
13098 else if (MEM_P (x))
13099 {
13100 machine_mode mode = GET_MODE (x);
13101 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13102 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13103 output_address (mode, plus_constant (Pmode,
13104 XEXP (XEXP (x, 0), 0), 12));
13105 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13106 output_address (mode, plus_constant (Pmode,
13107 XEXP (XEXP (x, 0), 0), 12));
13108 else
13109 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13110 if (small_data_operand (x, GET_MODE (x)))
13111 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13112 reg_names[SMALL_DATA_REG]);
13113 }
13114 return;
13115
13116 /* Print AltiVec memory operand. */
13117 case 'y':
13118 {
13119 rtx tmp;
13120
13121 gcc_assert (MEM_P (x));
13122
13123 tmp = XEXP (x, 0);
13124
13125 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13126 && GET_CODE (tmp) == AND
13127 && CONST_INT_P (XEXP (tmp, 1))
13128 && INTVAL (XEXP (tmp, 1)) == -16)
13129 tmp = XEXP (tmp, 0);
13130 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13131 && GET_CODE (tmp) == PRE_MODIFY)
13132 tmp = XEXP (tmp, 1);
13133 if (REG_P (tmp))
13134 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13135 else
13136 {
13137 if (GET_CODE (tmp) != PLUS
13138 || !REG_P (XEXP (tmp, 0))
13139 || !REG_P (XEXP (tmp, 1)))
13140 {
13141 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13142 break;
13143 }
13144
13145 if (REGNO (XEXP (tmp, 0)) == 0)
13146 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13147 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13148 else
13149 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13150 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13151 }
13152 break;
13153 }
13154
13155 case 0:
13156 if (REG_P (x))
13157 fprintf (file, "%s", reg_names[REGNO (x)]);
13158 else if (MEM_P (x))
13159 {
13160 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13161 know the width from the mode. */
13162 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13163 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13164 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13165 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13166 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13167 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13168 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13169 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13170 else
13171 output_address (GET_MODE (x), XEXP (x, 0));
13172 }
13173 else if (toc_relative_expr_p (x, false,
13174 &tocrel_base_oac, &tocrel_offset_oac))
13175 /* This hack along with a corresponding hack in
13176 rs6000_output_addr_const_extra arranges to output addends
13177 where the assembler expects to find them. eg.
13178 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13179 without this hack would be output as "x@toc+4". We
13180 want "x+4@toc". */
13181 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13182 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13183 output_addr_const (file, XVECEXP (x, 0, 0));
13184 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13185 output_addr_const (file, XVECEXP (x, 0, 1));
13186 else
13187 output_addr_const (file, x);
13188 return;
13189
13190 case '&':
13191 if (const char *name = get_some_local_dynamic_name ())
13192 assemble_name (file, name);
13193 else
13194 output_operand_lossage ("'%%&' used without any "
13195 "local dynamic TLS references");
13196 return;
13197
13198 default:
13199 output_operand_lossage ("invalid %%xn code");
13200 }
13201 }
13202 \f
13203 /* Print the address of an operand. */
13204
13205 void
13206 print_operand_address (FILE *file, rtx x)
13207 {
13208 if (REG_P (x))
13209 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13210
13211 /* Is it a PC-relative address? */
13212 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13213 {
13214 HOST_WIDE_INT offset;
13215
13216 if (GET_CODE (x) == CONST)
13217 x = XEXP (x, 0);
13218
13219 if (GET_CODE (x) == PLUS)
13220 {
13221 offset = INTVAL (XEXP (x, 1));
13222 x = XEXP (x, 0);
13223 }
13224 else
13225 offset = 0;
13226
13227 output_addr_const (file, x);
13228
13229 if (offset)
13230 fprintf (file, "%+" PRId64, offset);
13231
13232 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13233 fprintf (file, "@got");
13234
13235 fprintf (file, "@pcrel");
13236 }
13237 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13238 || GET_CODE (x) == LABEL_REF)
13239 {
13240 output_addr_const (file, x);
13241 if (small_data_operand (x, GET_MODE (x)))
13242 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13243 reg_names[SMALL_DATA_REG]);
13244 else
13245 gcc_assert (!TARGET_TOC);
13246 }
13247 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13248 && REG_P (XEXP (x, 1)))
13249 {
13250 if (REGNO (XEXP (x, 0)) == 0)
13251 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13252 reg_names[ REGNO (XEXP (x, 0)) ]);
13253 else
13254 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13255 reg_names[ REGNO (XEXP (x, 1)) ]);
13256 }
13257 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13258 && CONST_INT_P (XEXP (x, 1)))
13259 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13260 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13261 #if TARGET_MACHO
13262 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13263 && CONSTANT_P (XEXP (x, 1)))
13264 {
13265 fprintf (file, "lo16(");
13266 output_addr_const (file, XEXP (x, 1));
13267 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13268 }
13269 #endif
13270 #if TARGET_ELF
13271 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13272 && CONSTANT_P (XEXP (x, 1)))
13273 {
13274 output_addr_const (file, XEXP (x, 1));
13275 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13276 }
13277 #endif
13278 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13279 {
13280 /* This hack along with a corresponding hack in
13281 rs6000_output_addr_const_extra arranges to output addends
13282 where the assembler expects to find them. eg.
13283 (lo_sum (reg 9)
13284 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13285 without this hack would be output as "x@toc+8@l(9)". We
13286 want "x+8@toc@l(9)". */
13287 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13288 if (GET_CODE (x) == LO_SUM)
13289 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13290 else
13291 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13292 }
13293 else
13294 output_addr_const (file, x);
13295 }
13296 \f
13297 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13298
13299 bool
13300 rs6000_output_addr_const_extra (FILE *file, rtx x)
13301 {
13302 if (GET_CODE (x) == UNSPEC)
13303 switch (XINT (x, 1))
13304 {
13305 case UNSPEC_TOCREL:
13306 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13307 && REG_P (XVECEXP (x, 0, 1))
13308 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13309 output_addr_const (file, XVECEXP (x, 0, 0));
13310 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13311 {
13312 if (INTVAL (tocrel_offset_oac) >= 0)
13313 fprintf (file, "+");
13314 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13315 }
13316 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13317 {
13318 putc ('-', file);
13319 assemble_name (file, toc_label_name);
13320 need_toc_init = 1;
13321 }
13322 else if (TARGET_ELF)
13323 fputs ("@toc", file);
13324 return true;
13325
13326 #if TARGET_MACHO
13327 case UNSPEC_MACHOPIC_OFFSET:
13328 output_addr_const (file, XVECEXP (x, 0, 0));
13329 putc ('-', file);
13330 machopic_output_function_base_name (file);
13331 return true;
13332 #endif
13333 }
13334 return false;
13335 }
13336 \f
13337 /* Target hook for assembling integer objects. The PowerPC version has
13338 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13339 is defined. It also needs to handle DI-mode objects on 64-bit
13340 targets. */
13341
13342 static bool
13343 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13344 {
13345 #ifdef RELOCATABLE_NEEDS_FIXUP
13346 /* Special handling for SI values. */
13347 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13348 {
13349 static int recurse = 0;
13350
13351 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13352 the .fixup section. Since the TOC section is already relocated, we
13353 don't need to mark it here. We used to skip the text section, but it
13354 should never be valid for relocated addresses to be placed in the text
13355 section. */
13356 if (DEFAULT_ABI == ABI_V4
13357 && (TARGET_RELOCATABLE || flag_pic > 1)
13358 && in_section != toc_section
13359 && !recurse
13360 && !CONST_SCALAR_INT_P (x)
13361 && CONSTANT_P (x))
13362 {
13363 char buf[256];
13364
13365 recurse = 1;
13366 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13367 fixuplabelno++;
13368 ASM_OUTPUT_LABEL (asm_out_file, buf);
13369 fprintf (asm_out_file, "\t.long\t(");
13370 output_addr_const (asm_out_file, x);
13371 fprintf (asm_out_file, ")@fixup\n");
13372 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13373 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13374 fprintf (asm_out_file, "\t.long\t");
13375 assemble_name (asm_out_file, buf);
13376 fprintf (asm_out_file, "\n\t.previous\n");
13377 recurse = 0;
13378 return true;
13379 }
13380 /* Remove initial .'s to turn a -mcall-aixdesc function
13381 address into the address of the descriptor, not the function
13382 itself. */
13383 else if (SYMBOL_REF_P (x)
13384 && XSTR (x, 0)[0] == '.'
13385 && DEFAULT_ABI == ABI_AIX)
13386 {
13387 const char *name = XSTR (x, 0);
13388 while (*name == '.')
13389 name++;
13390
13391 fprintf (asm_out_file, "\t.long\t%s\n", name);
13392 return true;
13393 }
13394 }
13395 #endif /* RELOCATABLE_NEEDS_FIXUP */
13396 return default_assemble_integer (x, size, aligned_p);
13397 }
13398
13399 /* Return a template string for assembly to emit when making an
13400 external call. FUNOP is the call mem argument operand number. */
13401
13402 static const char *
13403 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13404 {
13405 /* -Wformat-overflow workaround, without which gcc thinks that %u
13406 might produce 10 digits. */
13407 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13408
13409 char arg[12];
13410 arg[0] = 0;
13411 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13412 {
13413 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13414 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13415 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13416 sprintf (arg, "(%%&@tlsld)");
13417 }
13418
13419 /* The magic 32768 offset here corresponds to the offset of
13420 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13421 char z[11];
13422 sprintf (z, "%%z%u%s", funop,
13423 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13424 ? "+32768" : ""));
13425
13426 static char str[32]; /* 1 spare */
13427 if (rs6000_pcrel_p (cfun))
13428 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13429 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13430 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13431 sibcall ? "" : "\n\tnop");
13432 else if (DEFAULT_ABI == ABI_V4)
13433 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13434 flag_pic ? "@plt" : "");
13435 #if TARGET_MACHO
13436 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13437 else if (DEFAULT_ABI == ABI_DARWIN)
13438 {
13439 /* The cookie is in operand func+2. */
13440 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13441 int cookie = INTVAL (operands[funop + 2]);
13442 if (cookie & CALL_LONG)
13443 {
13444 tree funname = get_identifier (XSTR (operands[funop], 0));
13445 tree labelname = get_prev_label (funname);
13446 gcc_checking_assert (labelname && !sibcall);
13447
13448 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13449 instruction will reach 'foo', otherwise link as 'bl L42'".
13450 "L42" should be a 'branch island', that will do a far jump to
13451 'foo'. Branch islands are generated in
13452 macho_branch_islands(). */
13453 sprintf (str, "jbsr %%z%u,%.10s", funop,
13454 IDENTIFIER_POINTER (labelname));
13455 }
13456 else
13457 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13458 after the call. */
13459 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13460 }
13461 #endif
13462 else
13463 gcc_unreachable ();
13464 return str;
13465 }
13466
13467 const char *
13468 rs6000_call_template (rtx *operands, unsigned int funop)
13469 {
13470 return rs6000_call_template_1 (operands, funop, false);
13471 }
13472
13473 const char *
13474 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13475 {
13476 return rs6000_call_template_1 (operands, funop, true);
13477 }
13478
13479 /* As above, for indirect calls. */
13480
13481 static const char *
13482 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13483 bool sibcall)
13484 {
13485 /* -Wformat-overflow workaround, without which gcc thinks that %u
13486 might produce 10 digits. Note that -Wformat-overflow will not
13487 currently warn here for str[], so do not rely on a warning to
13488 ensure str[] is correctly sized. */
13489 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13490
13491 /* Currently, funop is either 0 or 1. The maximum string is always
13492 a !speculate 64-bit __tls_get_addr call.
13493
13494 ABI_ELFv2, pcrel:
13495 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13496 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13497 . 9 crset 2\n\t
13498 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13499 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13500 . 8 beq%T1l-
13501 .---
13502 .142
13503
13504 ABI_AIX:
13505 . 9 ld 2,%3\n\t
13506 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13507 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13508 . 9 crset 2\n\t
13509 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13510 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13511 . 10 beq%T1l-\n\t
13512 . 10 ld 2,%4(1)
13513 .---
13514 .151
13515
13516 ABI_ELFv2:
13517 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13518 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13519 . 9 crset 2\n\t
13520 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13521 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13522 . 10 beq%T1l-\n\t
13523 . 10 ld 2,%3(1)
13524 .---
13525 .142
13526
13527 ABI_V4:
13528 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13529 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13530 . 9 crset 2\n\t
13531 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13532 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13533 . 8 beq%T1l-
13534 .---
13535 .141 */
13536 static char str[160]; /* 8 spare */
13537 char *s = str;
13538 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13539
13540 if (DEFAULT_ABI == ABI_AIX)
13541 s += sprintf (s,
13542 "l%s 2,%%%u\n\t",
13543 ptrload, funop + 2);
13544
13545 /* We don't need the extra code to stop indirect call speculation if
13546 calling via LR. */
13547 bool speculate = (TARGET_MACHO
13548 || rs6000_speculate_indirect_jumps
13549 || (REG_P (operands[funop])
13550 && REGNO (operands[funop]) == LR_REGNO));
13551
13552 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13553 {
13554 const char *rel64 = TARGET_64BIT ? "64" : "";
13555 char tls[29];
13556 tls[0] = 0;
13557 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13558 {
13559 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13560 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13561 rel64, funop + 1);
13562 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13563 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13564 rel64);
13565 }
13566
13567 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13568 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13569 && flag_pic == 2 ? "+32768" : "");
13570 if (!speculate)
13571 {
13572 s += sprintf (s,
13573 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13574 tls, rel64, notoc, funop, addend);
13575 s += sprintf (s, "crset 2\n\t");
13576 }
13577 s += sprintf (s,
13578 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13579 tls, rel64, notoc, funop, addend);
13580 }
13581 else if (!speculate)
13582 s += sprintf (s, "crset 2\n\t");
13583
13584 if (rs6000_pcrel_p (cfun))
13585 {
13586 if (speculate)
13587 sprintf (s, "b%%T%ul", funop);
13588 else
13589 sprintf (s, "beq%%T%ul-", funop);
13590 }
13591 else if (DEFAULT_ABI == ABI_AIX)
13592 {
13593 if (speculate)
13594 sprintf (s,
13595 "b%%T%ul\n\t"
13596 "l%s 2,%%%u(1)",
13597 funop, ptrload, funop + 3);
13598 else
13599 sprintf (s,
13600 "beq%%T%ul-\n\t"
13601 "l%s 2,%%%u(1)",
13602 funop, ptrload, funop + 3);
13603 }
13604 else if (DEFAULT_ABI == ABI_ELFv2)
13605 {
13606 if (speculate)
13607 sprintf (s,
13608 "b%%T%ul\n\t"
13609 "l%s 2,%%%u(1)",
13610 funop, ptrload, funop + 2);
13611 else
13612 sprintf (s,
13613 "beq%%T%ul-\n\t"
13614 "l%s 2,%%%u(1)",
13615 funop, ptrload, funop + 2);
13616 }
13617 else
13618 {
13619 if (speculate)
13620 sprintf (s,
13621 "b%%T%u%s",
13622 funop, sibcall ? "" : "l");
13623 else
13624 sprintf (s,
13625 "beq%%T%u%s-%s",
13626 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13627 }
13628 return str;
13629 }
13630
13631 const char *
13632 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13633 {
13634 return rs6000_indirect_call_template_1 (operands, funop, false);
13635 }
13636
13637 const char *
13638 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13639 {
13640 return rs6000_indirect_call_template_1 (operands, funop, true);
13641 }
13642
13643 #if HAVE_AS_PLTSEQ
13644 /* Output indirect call insns. WHICH identifies the type of sequence. */
13645 const char *
13646 rs6000_pltseq_template (rtx *operands, int which)
13647 {
13648 const char *rel64 = TARGET_64BIT ? "64" : "";
13649 char tls[30];
13650 tls[0] = 0;
13651 if (GET_CODE (operands[3]) == UNSPEC)
13652 {
13653 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13654 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13655 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13656 off, rel64);
13657 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13658 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13659 off, rel64);
13660 }
13661
13662 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13663 static char str[96]; /* 10 spare */
13664 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13665 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13666 && flag_pic == 2 ? "+32768" : "");
13667 switch (which)
13668 {
13669 case RS6000_PLTSEQ_TOCSAVE:
13670 sprintf (str,
13671 "st%s\n\t"
13672 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13673 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13674 tls, rel64);
13675 break;
13676 case RS6000_PLTSEQ_PLT16_HA:
13677 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13678 sprintf (str,
13679 "lis %%0,0\n\t"
13680 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13681 tls, off, rel64);
13682 else
13683 sprintf (str,
13684 "addis %%0,%%1,0\n\t"
13685 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13686 tls, off, rel64, addend);
13687 break;
13688 case RS6000_PLTSEQ_PLT16_LO:
13689 sprintf (str,
13690 "l%s %%0,0(%%1)\n\t"
13691 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13692 TARGET_64BIT ? "d" : "wz",
13693 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13694 break;
13695 case RS6000_PLTSEQ_MTCTR:
13696 sprintf (str,
13697 "mtctr %%1\n\t"
13698 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13699 tls, rel64, addend);
13700 break;
13701 case RS6000_PLTSEQ_PLT_PCREL34:
13702 sprintf (str,
13703 "pl%s %%0,0(0),1\n\t"
13704 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13705 TARGET_64BIT ? "d" : "wz",
13706 tls, rel64);
13707 break;
13708 default:
13709 gcc_unreachable ();
13710 }
13711 return str;
13712 }
13713 #endif
13714 \f
13715 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13716 /* Emit an assembler directive to set symbol visibility for DECL to
13717 VISIBILITY_TYPE. */
13718
13719 static void
13720 rs6000_assemble_visibility (tree decl, int vis)
13721 {
13722 if (TARGET_XCOFF)
13723 return;
13724
13725 /* Functions need to have their entry point symbol visibility set as
13726 well as their descriptor symbol visibility. */
13727 if (DEFAULT_ABI == ABI_AIX
13728 && DOT_SYMBOLS
13729 && TREE_CODE (decl) == FUNCTION_DECL)
13730 {
13731 static const char * const visibility_types[] = {
13732 NULL, "protected", "hidden", "internal"
13733 };
13734
13735 const char *name, *type;
13736
13737 name = ((* targetm.strip_name_encoding)
13738 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13739 type = visibility_types[vis];
13740
13741 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13742 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13743 }
13744 else
13745 default_assemble_visibility (decl, vis);
13746 }
13747 #endif
13748 \f
13749 enum rtx_code
13750 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13751 {
13752 /* Reversal of FP compares takes care -- an ordered compare
13753 becomes an unordered compare and vice versa. */
13754 if (mode == CCFPmode
13755 && (!flag_finite_math_only
13756 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13757 || code == UNEQ || code == LTGT))
13758 return reverse_condition_maybe_unordered (code);
13759 else
13760 return reverse_condition (code);
13761 }
13762
13763 /* Generate a compare for CODE. Return a brand-new rtx that
13764 represents the result of the compare. */
13765
13766 static rtx
13767 rs6000_generate_compare (rtx cmp, machine_mode mode)
13768 {
13769 machine_mode comp_mode;
13770 rtx compare_result;
13771 enum rtx_code code = GET_CODE (cmp);
13772 rtx op0 = XEXP (cmp, 0);
13773 rtx op1 = XEXP (cmp, 1);
13774
13775 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13776 comp_mode = CCmode;
13777 else if (FLOAT_MODE_P (mode))
13778 comp_mode = CCFPmode;
13779 else if (code == GTU || code == LTU
13780 || code == GEU || code == LEU)
13781 comp_mode = CCUNSmode;
13782 else if ((code == EQ || code == NE)
13783 && unsigned_reg_p (op0)
13784 && (unsigned_reg_p (op1)
13785 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13786 /* These are unsigned values, perhaps there will be a later
13787 ordering compare that can be shared with this one. */
13788 comp_mode = CCUNSmode;
13789 else
13790 comp_mode = CCmode;
13791
13792 /* If we have an unsigned compare, make sure we don't have a signed value as
13793 an immediate. */
13794 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13795 && INTVAL (op1) < 0)
13796 {
13797 op0 = copy_rtx_if_shared (op0);
13798 op1 = force_reg (GET_MODE (op0), op1);
13799 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13800 }
13801
13802 /* First, the compare. */
13803 compare_result = gen_reg_rtx (comp_mode);
13804
13805 /* IEEE 128-bit support in VSX registers when we do not have hardware
13806 support. */
13807 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13808 {
13809 rtx libfunc = NULL_RTX;
13810 bool check_nan = false;
13811 rtx dest;
13812
13813 switch (code)
13814 {
13815 case EQ:
13816 case NE:
13817 libfunc = optab_libfunc (eq_optab, mode);
13818 break;
13819
13820 case GT:
13821 case GE:
13822 libfunc = optab_libfunc (ge_optab, mode);
13823 break;
13824
13825 case LT:
13826 case LE:
13827 libfunc = optab_libfunc (le_optab, mode);
13828 break;
13829
13830 case UNORDERED:
13831 case ORDERED:
13832 libfunc = optab_libfunc (unord_optab, mode);
13833 code = (code == UNORDERED) ? NE : EQ;
13834 break;
13835
13836 case UNGE:
13837 case UNGT:
13838 check_nan = true;
13839 libfunc = optab_libfunc (ge_optab, mode);
13840 code = (code == UNGE) ? GE : GT;
13841 break;
13842
13843 case UNLE:
13844 case UNLT:
13845 check_nan = true;
13846 libfunc = optab_libfunc (le_optab, mode);
13847 code = (code == UNLE) ? LE : LT;
13848 break;
13849
13850 case UNEQ:
13851 case LTGT:
13852 check_nan = true;
13853 libfunc = optab_libfunc (eq_optab, mode);
13854 code = (code = UNEQ) ? EQ : NE;
13855 break;
13856
13857 default:
13858 gcc_unreachable ();
13859 }
13860
13861 gcc_assert (libfunc);
13862
13863 if (!check_nan)
13864 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13865 SImode, op0, mode, op1, mode);
13866
13867 /* The library signals an exception for signalling NaNs, so we need to
13868 handle isgreater, etc. by first checking isordered. */
13869 else
13870 {
13871 rtx ne_rtx, normal_dest, unord_dest;
13872 rtx unord_func = optab_libfunc (unord_optab, mode);
13873 rtx join_label = gen_label_rtx ();
13874 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13875 rtx unord_cmp = gen_reg_rtx (comp_mode);
13876
13877
13878 /* Test for either value being a NaN. */
13879 gcc_assert (unord_func);
13880 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13881 SImode, op0, mode, op1, mode);
13882
13883 /* Set value (0) if either value is a NaN, and jump to the join
13884 label. */
13885 dest = gen_reg_rtx (SImode);
13886 emit_move_insn (dest, const1_rtx);
13887 emit_insn (gen_rtx_SET (unord_cmp,
13888 gen_rtx_COMPARE (comp_mode, unord_dest,
13889 const0_rtx)));
13890
13891 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13892 emit_jump_insn (gen_rtx_SET (pc_rtx,
13893 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13894 join_ref,
13895 pc_rtx)));
13896
13897 /* Do the normal comparison, knowing that the values are not
13898 NaNs. */
13899 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13900 SImode, op0, mode, op1, mode);
13901
13902 emit_insn (gen_cstoresi4 (dest,
13903 gen_rtx_fmt_ee (code, SImode, normal_dest,
13904 const0_rtx),
13905 normal_dest, const0_rtx));
13906
13907 /* Join NaN and non-Nan paths. Compare dest against 0. */
13908 emit_label (join_label);
13909 code = NE;
13910 }
13911
13912 emit_insn (gen_rtx_SET (compare_result,
13913 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13914 }
13915
13916 else
13917 {
13918 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13919 CLOBBERs to match cmptf_internal2 pattern. */
13920 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13921 && FLOAT128_IBM_P (GET_MODE (op0))
13922 && TARGET_HARD_FLOAT)
13923 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13924 gen_rtvec (10,
13925 gen_rtx_SET (compare_result,
13926 gen_rtx_COMPARE (comp_mode, op0, op1)),
13927 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13928 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13929 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13930 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13931 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13932 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13933 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13934 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13935 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
13936 else if (GET_CODE (op1) == UNSPEC
13937 && XINT (op1, 1) == UNSPEC_SP_TEST)
13938 {
13939 rtx op1b = XVECEXP (op1, 0, 0);
13940 comp_mode = CCEQmode;
13941 compare_result = gen_reg_rtx (CCEQmode);
13942 if (TARGET_64BIT)
13943 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
13944 else
13945 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
13946 }
13947 else
13948 emit_insn (gen_rtx_SET (compare_result,
13949 gen_rtx_COMPARE (comp_mode, op0, op1)));
13950 }
13951
13952 validate_condition_mode (code, GET_MODE (compare_result));
13953
13954 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
13955 }
13956
13957 \f
13958 /* Return the diagnostic message string if the binary operation OP is
13959 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13960
13961 static const char*
13962 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
13963 const_tree type1,
13964 const_tree type2)
13965 {
13966 machine_mode mode1 = TYPE_MODE (type1);
13967 machine_mode mode2 = TYPE_MODE (type2);
13968
13969 /* For complex modes, use the inner type. */
13970 if (COMPLEX_MODE_P (mode1))
13971 mode1 = GET_MODE_INNER (mode1);
13972
13973 if (COMPLEX_MODE_P (mode2))
13974 mode2 = GET_MODE_INNER (mode2);
13975
13976 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
13977 double to intermix unless -mfloat128-convert. */
13978 if (mode1 == mode2)
13979 return NULL;
13980
13981 if (!TARGET_FLOAT128_CVT)
13982 {
13983 if ((mode1 == KFmode && mode2 == IFmode)
13984 || (mode1 == IFmode && mode2 == KFmode))
13985 return N_("__float128 and __ibm128 cannot be used in the same "
13986 "expression");
13987
13988 if (TARGET_IEEEQUAD
13989 && ((mode1 == IFmode && mode2 == TFmode)
13990 || (mode1 == TFmode && mode2 == IFmode)))
13991 return N_("__ibm128 and long double cannot be used in the same "
13992 "expression");
13993
13994 if (!TARGET_IEEEQUAD
13995 && ((mode1 == KFmode && mode2 == TFmode)
13996 || (mode1 == TFmode && mode2 == KFmode)))
13997 return N_("__float128 and long double cannot be used in the same "
13998 "expression");
13999 }
14000
14001 return NULL;
14002 }
14003
14004 \f
14005 /* Expand floating point conversion to/from __float128 and __ibm128. */
14006
14007 void
14008 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14009 {
14010 machine_mode dest_mode = GET_MODE (dest);
14011 machine_mode src_mode = GET_MODE (src);
14012 convert_optab cvt = unknown_optab;
14013 bool do_move = false;
14014 rtx libfunc = NULL_RTX;
14015 rtx dest2;
14016 typedef rtx (*rtx_2func_t) (rtx, rtx);
14017 rtx_2func_t hw_convert = (rtx_2func_t)0;
14018 size_t kf_or_tf;
14019
14020 struct hw_conv_t {
14021 rtx_2func_t from_df;
14022 rtx_2func_t from_sf;
14023 rtx_2func_t from_si_sign;
14024 rtx_2func_t from_si_uns;
14025 rtx_2func_t from_di_sign;
14026 rtx_2func_t from_di_uns;
14027 rtx_2func_t to_df;
14028 rtx_2func_t to_sf;
14029 rtx_2func_t to_si_sign;
14030 rtx_2func_t to_si_uns;
14031 rtx_2func_t to_di_sign;
14032 rtx_2func_t to_di_uns;
14033 } hw_conversions[2] = {
14034 /* convertions to/from KFmode */
14035 {
14036 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14037 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14038 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14039 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14040 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14041 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14042 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14043 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14044 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14045 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14046 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14047 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14048 },
14049
14050 /* convertions to/from TFmode */
14051 {
14052 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14053 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14054 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14055 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14056 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14057 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14058 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14059 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14060 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14061 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14062 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14063 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14064 },
14065 };
14066
14067 if (dest_mode == src_mode)
14068 gcc_unreachable ();
14069
14070 /* Eliminate memory operations. */
14071 if (MEM_P (src))
14072 src = force_reg (src_mode, src);
14073
14074 if (MEM_P (dest))
14075 {
14076 rtx tmp = gen_reg_rtx (dest_mode);
14077 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14078 rs6000_emit_move (dest, tmp, dest_mode);
14079 return;
14080 }
14081
14082 /* Convert to IEEE 128-bit floating point. */
14083 if (FLOAT128_IEEE_P (dest_mode))
14084 {
14085 if (dest_mode == KFmode)
14086 kf_or_tf = 0;
14087 else if (dest_mode == TFmode)
14088 kf_or_tf = 1;
14089 else
14090 gcc_unreachable ();
14091
14092 switch (src_mode)
14093 {
14094 case E_DFmode:
14095 cvt = sext_optab;
14096 hw_convert = hw_conversions[kf_or_tf].from_df;
14097 break;
14098
14099 case E_SFmode:
14100 cvt = sext_optab;
14101 hw_convert = hw_conversions[kf_or_tf].from_sf;
14102 break;
14103
14104 case E_KFmode:
14105 case E_IFmode:
14106 case E_TFmode:
14107 if (FLOAT128_IBM_P (src_mode))
14108 cvt = sext_optab;
14109 else
14110 do_move = true;
14111 break;
14112
14113 case E_SImode:
14114 if (unsigned_p)
14115 {
14116 cvt = ufloat_optab;
14117 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14118 }
14119 else
14120 {
14121 cvt = sfloat_optab;
14122 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14123 }
14124 break;
14125
14126 case E_DImode:
14127 if (unsigned_p)
14128 {
14129 cvt = ufloat_optab;
14130 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14131 }
14132 else
14133 {
14134 cvt = sfloat_optab;
14135 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14136 }
14137 break;
14138
14139 default:
14140 gcc_unreachable ();
14141 }
14142 }
14143
14144 /* Convert from IEEE 128-bit floating point. */
14145 else if (FLOAT128_IEEE_P (src_mode))
14146 {
14147 if (src_mode == KFmode)
14148 kf_or_tf = 0;
14149 else if (src_mode == TFmode)
14150 kf_or_tf = 1;
14151 else
14152 gcc_unreachable ();
14153
14154 switch (dest_mode)
14155 {
14156 case E_DFmode:
14157 cvt = trunc_optab;
14158 hw_convert = hw_conversions[kf_or_tf].to_df;
14159 break;
14160
14161 case E_SFmode:
14162 cvt = trunc_optab;
14163 hw_convert = hw_conversions[kf_or_tf].to_sf;
14164 break;
14165
14166 case E_KFmode:
14167 case E_IFmode:
14168 case E_TFmode:
14169 if (FLOAT128_IBM_P (dest_mode))
14170 cvt = trunc_optab;
14171 else
14172 do_move = true;
14173 break;
14174
14175 case E_SImode:
14176 if (unsigned_p)
14177 {
14178 cvt = ufix_optab;
14179 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14180 }
14181 else
14182 {
14183 cvt = sfix_optab;
14184 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14185 }
14186 break;
14187
14188 case E_DImode:
14189 if (unsigned_p)
14190 {
14191 cvt = ufix_optab;
14192 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14193 }
14194 else
14195 {
14196 cvt = sfix_optab;
14197 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14198 }
14199 break;
14200
14201 default:
14202 gcc_unreachable ();
14203 }
14204 }
14205
14206 /* Both IBM format. */
14207 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14208 do_move = true;
14209
14210 else
14211 gcc_unreachable ();
14212
14213 /* Handle conversion between TFmode/KFmode/IFmode. */
14214 if (do_move)
14215 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14216
14217 /* Handle conversion if we have hardware support. */
14218 else if (TARGET_FLOAT128_HW && hw_convert)
14219 emit_insn ((hw_convert) (dest, src));
14220
14221 /* Call an external function to do the conversion. */
14222 else if (cvt != unknown_optab)
14223 {
14224 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14225 gcc_assert (libfunc != NULL_RTX);
14226
14227 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14228 src, src_mode);
14229
14230 gcc_assert (dest2 != NULL_RTX);
14231 if (!rtx_equal_p (dest, dest2))
14232 emit_move_insn (dest, dest2);
14233 }
14234
14235 else
14236 gcc_unreachable ();
14237
14238 return;
14239 }
14240
14241 \f
14242 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14243 can be used as that dest register. Return the dest register. */
14244
14245 rtx
14246 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14247 {
14248 if (op2 == const0_rtx)
14249 return op1;
14250
14251 if (GET_CODE (scratch) == SCRATCH)
14252 scratch = gen_reg_rtx (mode);
14253
14254 if (logical_operand (op2, mode))
14255 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14256 else
14257 emit_insn (gen_rtx_SET (scratch,
14258 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14259
14260 return scratch;
14261 }
14262
14263 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14264 requires this. The result is mode MODE. */
14265 rtx
14266 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14267 {
14268 rtx cond[2];
14269 int n = 0;
14270 if (code == LTGT || code == LE || code == UNLT)
14271 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14272 if (code == LTGT || code == GE || code == UNGT)
14273 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14274 if (code == LE || code == GE || code == UNEQ)
14275 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14276 if (code == UNLT || code == UNGT || code == UNEQ)
14277 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14278
14279 gcc_assert (n == 2);
14280
14281 rtx cc = gen_reg_rtx (CCEQmode);
14282 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14283 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14284
14285 return cc;
14286 }
14287
14288 void
14289 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14290 {
14291 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14292 rtx_code cond_code = GET_CODE (condition_rtx);
14293
14294 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14295 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14296 ;
14297 else if (cond_code == NE
14298 || cond_code == GE || cond_code == LE
14299 || cond_code == GEU || cond_code == LEU
14300 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14301 {
14302 rtx not_result = gen_reg_rtx (CCEQmode);
14303 rtx not_op, rev_cond_rtx;
14304 machine_mode cc_mode;
14305
14306 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14307
14308 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14309 SImode, XEXP (condition_rtx, 0), const0_rtx);
14310 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14311 emit_insn (gen_rtx_SET (not_result, not_op));
14312 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14313 }
14314
14315 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14316 if (op_mode == VOIDmode)
14317 op_mode = GET_MODE (XEXP (operands[1], 1));
14318
14319 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14320 {
14321 PUT_MODE (condition_rtx, DImode);
14322 convert_move (operands[0], condition_rtx, 0);
14323 }
14324 else
14325 {
14326 PUT_MODE (condition_rtx, SImode);
14327 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14328 }
14329 }
14330
14331 /* Emit a branch of kind CODE to location LOC. */
14332
14333 void
14334 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14335 {
14336 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14337 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14338 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14339 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14340 }
14341
14342 /* Return the string to output a conditional branch to LABEL, which is
14343 the operand template of the label, or NULL if the branch is really a
14344 conditional return.
14345
14346 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14347 condition code register and its mode specifies what kind of
14348 comparison we made.
14349
14350 REVERSED is nonzero if we should reverse the sense of the comparison.
14351
14352 INSN is the insn. */
14353
14354 char *
14355 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14356 {
14357 static char string[64];
14358 enum rtx_code code = GET_CODE (op);
14359 rtx cc_reg = XEXP (op, 0);
14360 machine_mode mode = GET_MODE (cc_reg);
14361 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14362 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14363 int really_reversed = reversed ^ need_longbranch;
14364 char *s = string;
14365 const char *ccode;
14366 const char *pred;
14367 rtx note;
14368
14369 validate_condition_mode (code, mode);
14370
14371 /* Work out which way this really branches. We could use
14372 reverse_condition_maybe_unordered here always but this
14373 makes the resulting assembler clearer. */
14374 if (really_reversed)
14375 {
14376 /* Reversal of FP compares takes care -- an ordered compare
14377 becomes an unordered compare and vice versa. */
14378 if (mode == CCFPmode)
14379 code = reverse_condition_maybe_unordered (code);
14380 else
14381 code = reverse_condition (code);
14382 }
14383
14384 switch (code)
14385 {
14386 /* Not all of these are actually distinct opcodes, but
14387 we distinguish them for clarity of the resulting assembler. */
14388 case NE: case LTGT:
14389 ccode = "ne"; break;
14390 case EQ: case UNEQ:
14391 ccode = "eq"; break;
14392 case GE: case GEU:
14393 ccode = "ge"; break;
14394 case GT: case GTU: case UNGT:
14395 ccode = "gt"; break;
14396 case LE: case LEU:
14397 ccode = "le"; break;
14398 case LT: case LTU: case UNLT:
14399 ccode = "lt"; break;
14400 case UNORDERED: ccode = "un"; break;
14401 case ORDERED: ccode = "nu"; break;
14402 case UNGE: ccode = "nl"; break;
14403 case UNLE: ccode = "ng"; break;
14404 default:
14405 gcc_unreachable ();
14406 }
14407
14408 /* Maybe we have a guess as to how likely the branch is. */
14409 pred = "";
14410 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14411 if (note != NULL_RTX)
14412 {
14413 /* PROB is the difference from 50%. */
14414 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14415 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14416
14417 /* Only hint for highly probable/improbable branches on newer cpus when
14418 we have real profile data, as static prediction overrides processor
14419 dynamic prediction. For older cpus we may as well always hint, but
14420 assume not taken for branches that are very close to 50% as a
14421 mispredicted taken branch is more expensive than a
14422 mispredicted not-taken branch. */
14423 if (rs6000_always_hint
14424 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14425 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14426 && br_prob_note_reliable_p (note)))
14427 {
14428 if (abs (prob) > REG_BR_PROB_BASE / 20
14429 && ((prob > 0) ^ need_longbranch))
14430 pred = "+";
14431 else
14432 pred = "-";
14433 }
14434 }
14435
14436 if (label == NULL)
14437 s += sprintf (s, "b%slr%s ", ccode, pred);
14438 else
14439 s += sprintf (s, "b%s%s ", ccode, pred);
14440
14441 /* We need to escape any '%' characters in the reg_names string.
14442 Assume they'd only be the first character.... */
14443 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14444 *s++ = '%';
14445 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14446
14447 if (label != NULL)
14448 {
14449 /* If the branch distance was too far, we may have to use an
14450 unconditional branch to go the distance. */
14451 if (need_longbranch)
14452 s += sprintf (s, ",$+8\n\tb %s", label);
14453 else
14454 s += sprintf (s, ",%s", label);
14455 }
14456
14457 return string;
14458 }
14459
14460 /* Return insn for VSX or Altivec comparisons. */
14461
14462 static rtx
14463 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14464 {
14465 rtx mask;
14466 machine_mode mode = GET_MODE (op0);
14467
14468 switch (code)
14469 {
14470 default:
14471 break;
14472
14473 case GE:
14474 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14475 return NULL_RTX;
14476 /* FALLTHRU */
14477
14478 case EQ:
14479 case GT:
14480 case GTU:
14481 case ORDERED:
14482 case UNORDERED:
14483 case UNEQ:
14484 case LTGT:
14485 mask = gen_reg_rtx (mode);
14486 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14487 return mask;
14488 }
14489
14490 return NULL_RTX;
14491 }
14492
14493 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14494 DMODE is expected destination mode. This is a recursive function. */
14495
14496 static rtx
14497 rs6000_emit_vector_compare (enum rtx_code rcode,
14498 rtx op0, rtx op1,
14499 machine_mode dmode)
14500 {
14501 rtx mask;
14502 bool swap_operands = false;
14503 bool try_again = false;
14504
14505 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14506 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14507
14508 /* See if the comparison works as is. */
14509 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14510 if (mask)
14511 return mask;
14512
14513 switch (rcode)
14514 {
14515 case LT:
14516 rcode = GT;
14517 swap_operands = true;
14518 try_again = true;
14519 break;
14520 case LTU:
14521 rcode = GTU;
14522 swap_operands = true;
14523 try_again = true;
14524 break;
14525 case NE:
14526 case UNLE:
14527 case UNLT:
14528 case UNGE:
14529 case UNGT:
14530 /* Invert condition and try again.
14531 e.g., A != B becomes ~(A==B). */
14532 {
14533 enum rtx_code rev_code;
14534 enum insn_code nor_code;
14535 rtx mask2;
14536
14537 rev_code = reverse_condition_maybe_unordered (rcode);
14538 if (rev_code == UNKNOWN)
14539 return NULL_RTX;
14540
14541 nor_code = optab_handler (one_cmpl_optab, dmode);
14542 if (nor_code == CODE_FOR_nothing)
14543 return NULL_RTX;
14544
14545 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14546 if (!mask2)
14547 return NULL_RTX;
14548
14549 mask = gen_reg_rtx (dmode);
14550 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14551 return mask;
14552 }
14553 break;
14554 case GE:
14555 case GEU:
14556 case LE:
14557 case LEU:
14558 /* Try GT/GTU/LT/LTU OR EQ */
14559 {
14560 rtx c_rtx, eq_rtx;
14561 enum insn_code ior_code;
14562 enum rtx_code new_code;
14563
14564 switch (rcode)
14565 {
14566 case GE:
14567 new_code = GT;
14568 break;
14569
14570 case GEU:
14571 new_code = GTU;
14572 break;
14573
14574 case LE:
14575 new_code = LT;
14576 break;
14577
14578 case LEU:
14579 new_code = LTU;
14580 break;
14581
14582 default:
14583 gcc_unreachable ();
14584 }
14585
14586 ior_code = optab_handler (ior_optab, dmode);
14587 if (ior_code == CODE_FOR_nothing)
14588 return NULL_RTX;
14589
14590 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14591 if (!c_rtx)
14592 return NULL_RTX;
14593
14594 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14595 if (!eq_rtx)
14596 return NULL_RTX;
14597
14598 mask = gen_reg_rtx (dmode);
14599 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14600 return mask;
14601 }
14602 break;
14603 default:
14604 return NULL_RTX;
14605 }
14606
14607 if (try_again)
14608 {
14609 if (swap_operands)
14610 std::swap (op0, op1);
14611
14612 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14613 if (mask)
14614 return mask;
14615 }
14616
14617 /* You only get two chances. */
14618 return NULL_RTX;
14619 }
14620
14621 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14622 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14623 operands for the relation operation COND. */
14624
14625 int
14626 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14627 rtx cond, rtx cc_op0, rtx cc_op1)
14628 {
14629 machine_mode dest_mode = GET_MODE (dest);
14630 machine_mode mask_mode = GET_MODE (cc_op0);
14631 enum rtx_code rcode = GET_CODE (cond);
14632 machine_mode cc_mode = CCmode;
14633 rtx mask;
14634 rtx cond2;
14635 bool invert_move = false;
14636
14637 if (VECTOR_UNIT_NONE_P (dest_mode))
14638 return 0;
14639
14640 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14641 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14642
14643 switch (rcode)
14644 {
14645 /* Swap operands if we can, and fall back to doing the operation as
14646 specified, and doing a NOR to invert the test. */
14647 case NE:
14648 case UNLE:
14649 case UNLT:
14650 case UNGE:
14651 case UNGT:
14652 /* Invert condition and try again.
14653 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14654 invert_move = true;
14655 rcode = reverse_condition_maybe_unordered (rcode);
14656 if (rcode == UNKNOWN)
14657 return 0;
14658 break;
14659
14660 case GE:
14661 case LE:
14662 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14663 {
14664 /* Invert condition to avoid compound test. */
14665 invert_move = true;
14666 rcode = reverse_condition (rcode);
14667 }
14668 break;
14669
14670 case GTU:
14671 case GEU:
14672 case LTU:
14673 case LEU:
14674 /* Mark unsigned tests with CCUNSmode. */
14675 cc_mode = CCUNSmode;
14676
14677 /* Invert condition to avoid compound test if necessary. */
14678 if (rcode == GEU || rcode == LEU)
14679 {
14680 invert_move = true;
14681 rcode = reverse_condition (rcode);
14682 }
14683 break;
14684
14685 default:
14686 break;
14687 }
14688
14689 /* Get the vector mask for the given relational operations. */
14690 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14691
14692 if (!mask)
14693 return 0;
14694
14695 if (invert_move)
14696 std::swap (op_true, op_false);
14697
14698 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14699 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14700 && (GET_CODE (op_true) == CONST_VECTOR
14701 || GET_CODE (op_false) == CONST_VECTOR))
14702 {
14703 rtx constant_0 = CONST0_RTX (dest_mode);
14704 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14705
14706 if (op_true == constant_m1 && op_false == constant_0)
14707 {
14708 emit_move_insn (dest, mask);
14709 return 1;
14710 }
14711
14712 else if (op_true == constant_0 && op_false == constant_m1)
14713 {
14714 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14715 return 1;
14716 }
14717
14718 /* If we can't use the vector comparison directly, perhaps we can use
14719 the mask for the true or false fields, instead of loading up a
14720 constant. */
14721 if (op_true == constant_m1)
14722 op_true = mask;
14723
14724 if (op_false == constant_0)
14725 op_false = mask;
14726 }
14727
14728 if (!REG_P (op_true) && !SUBREG_P (op_true))
14729 op_true = force_reg (dest_mode, op_true);
14730
14731 if (!REG_P (op_false) && !SUBREG_P (op_false))
14732 op_false = force_reg (dest_mode, op_false);
14733
14734 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14735 CONST0_RTX (dest_mode));
14736 emit_insn (gen_rtx_SET (dest,
14737 gen_rtx_IF_THEN_ELSE (dest_mode,
14738 cond2,
14739 op_true,
14740 op_false)));
14741 return 1;
14742 }
14743
14744 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14745 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14746 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14747 hardware has no such operation. */
14748
14749 static int
14750 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14751 {
14752 enum rtx_code code = GET_CODE (op);
14753 rtx op0 = XEXP (op, 0);
14754 rtx op1 = XEXP (op, 1);
14755 machine_mode compare_mode = GET_MODE (op0);
14756 machine_mode result_mode = GET_MODE (dest);
14757 bool max_p = false;
14758
14759 if (result_mode != compare_mode)
14760 return 0;
14761
14762 if (code == GE || code == GT)
14763 max_p = true;
14764 else if (code == LE || code == LT)
14765 max_p = false;
14766 else
14767 return 0;
14768
14769 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14770 ;
14771
14772 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
14773 max_p = !max_p;
14774
14775 else
14776 return 0;
14777
14778 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14779 return 1;
14780 }
14781
14782 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14783 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14784 operands of the last comparison is nonzero/true, FALSE_COND if it is
14785 zero/false. Return 0 if the hardware has no such operation. */
14786
14787 static int
14788 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14789 {
14790 enum rtx_code code = GET_CODE (op);
14791 rtx op0 = XEXP (op, 0);
14792 rtx op1 = XEXP (op, 1);
14793 machine_mode result_mode = GET_MODE (dest);
14794 rtx compare_rtx;
14795 rtx cmove_rtx;
14796 rtx clobber_rtx;
14797
14798 if (!can_create_pseudo_p ())
14799 return 0;
14800
14801 switch (code)
14802 {
14803 case EQ:
14804 case GE:
14805 case GT:
14806 break;
14807
14808 case NE:
14809 case LT:
14810 case LE:
14811 code = swap_condition (code);
14812 std::swap (op0, op1);
14813 break;
14814
14815 default:
14816 return 0;
14817 }
14818
14819 /* Generate: [(parallel [(set (dest)
14820 (if_then_else (op (cmp1) (cmp2))
14821 (true)
14822 (false)))
14823 (clobber (scratch))])]. */
14824
14825 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14826 cmove_rtx = gen_rtx_SET (dest,
14827 gen_rtx_IF_THEN_ELSE (result_mode,
14828 compare_rtx,
14829 true_cond,
14830 false_cond));
14831
14832 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14833 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14834 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14835
14836 return 1;
14837 }
14838
14839 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14840 operands of the last comparison is nonzero/true, FALSE_COND if it
14841 is zero/false. Return 0 if the hardware has no such operation. */
14842
14843 int
14844 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14845 {
14846 enum rtx_code code = GET_CODE (op);
14847 rtx op0 = XEXP (op, 0);
14848 rtx op1 = XEXP (op, 1);
14849 machine_mode compare_mode = GET_MODE (op0);
14850 machine_mode result_mode = GET_MODE (dest);
14851 rtx temp;
14852 bool is_against_zero;
14853
14854 /* These modes should always match. */
14855 if (GET_MODE (op1) != compare_mode
14856 /* In the isel case however, we can use a compare immediate, so
14857 op1 may be a small constant. */
14858 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14859 return 0;
14860 if (GET_MODE (true_cond) != result_mode)
14861 return 0;
14862 if (GET_MODE (false_cond) != result_mode)
14863 return 0;
14864
14865 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14866 if (TARGET_P9_MINMAX
14867 && (compare_mode == SFmode || compare_mode == DFmode)
14868 && (result_mode == SFmode || result_mode == DFmode))
14869 {
14870 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14871 return 1;
14872
14873 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14874 return 1;
14875 }
14876
14877 /* Don't allow using floating point comparisons for integer results for
14878 now. */
14879 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14880 return 0;
14881
14882 /* First, work out if the hardware can do this at all, or
14883 if it's too slow.... */
14884 if (!FLOAT_MODE_P (compare_mode))
14885 {
14886 if (TARGET_ISEL)
14887 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14888 return 0;
14889 }
14890
14891 is_against_zero = op1 == CONST0_RTX (compare_mode);
14892
14893 /* A floating-point subtract might overflow, underflow, or produce
14894 an inexact result, thus changing the floating-point flags, so it
14895 can't be generated if we care about that. It's safe if one side
14896 of the construct is zero, since then no subtract will be
14897 generated. */
14898 if (SCALAR_FLOAT_MODE_P (compare_mode)
14899 && flag_trapping_math && ! is_against_zero)
14900 return 0;
14901
14902 /* Eliminate half of the comparisons by switching operands, this
14903 makes the remaining code simpler. */
14904 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14905 || code == LTGT || code == LT || code == UNLE)
14906 {
14907 code = reverse_condition_maybe_unordered (code);
14908 temp = true_cond;
14909 true_cond = false_cond;
14910 false_cond = temp;
14911 }
14912
14913 /* UNEQ and LTGT take four instructions for a comparison with zero,
14914 it'll probably be faster to use a branch here too. */
14915 if (code == UNEQ && HONOR_NANS (compare_mode))
14916 return 0;
14917
14918 /* We're going to try to implement comparisons by performing
14919 a subtract, then comparing against zero. Unfortunately,
14920 Inf - Inf is NaN which is not zero, and so if we don't
14921 know that the operand is finite and the comparison
14922 would treat EQ different to UNORDERED, we can't do it. */
14923 if (HONOR_INFINITIES (compare_mode)
14924 && code != GT && code != UNGE
14925 && (!CONST_DOUBLE_P (op1)
14926 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
14927 /* Constructs of the form (a OP b ? a : b) are safe. */
14928 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
14929 || (! rtx_equal_p (op0, true_cond)
14930 && ! rtx_equal_p (op1, true_cond))))
14931 return 0;
14932
14933 /* At this point we know we can use fsel. */
14934
14935 /* Don't allow compare_mode other than SFmode or DFmode, for others there
14936 is no fsel instruction. */
14937 if (compare_mode != SFmode && compare_mode != DFmode)
14938 return 0;
14939
14940 /* Reduce the comparison to a comparison against zero. */
14941 if (! is_against_zero)
14942 {
14943 temp = gen_reg_rtx (compare_mode);
14944 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
14945 op0 = temp;
14946 op1 = CONST0_RTX (compare_mode);
14947 }
14948
14949 /* If we don't care about NaNs we can reduce some of the comparisons
14950 down to faster ones. */
14951 if (! HONOR_NANS (compare_mode))
14952 switch (code)
14953 {
14954 case GT:
14955 code = LE;
14956 temp = true_cond;
14957 true_cond = false_cond;
14958 false_cond = temp;
14959 break;
14960 case UNGE:
14961 code = GE;
14962 break;
14963 case UNEQ:
14964 code = EQ;
14965 break;
14966 default:
14967 break;
14968 }
14969
14970 /* Now, reduce everything down to a GE. */
14971 switch (code)
14972 {
14973 case GE:
14974 break;
14975
14976 case LE:
14977 temp = gen_reg_rtx (compare_mode);
14978 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
14979 op0 = temp;
14980 break;
14981
14982 case ORDERED:
14983 temp = gen_reg_rtx (compare_mode);
14984 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
14985 op0 = temp;
14986 break;
14987
14988 case EQ:
14989 temp = gen_reg_rtx (compare_mode);
14990 emit_insn (gen_rtx_SET (temp,
14991 gen_rtx_NEG (compare_mode,
14992 gen_rtx_ABS (compare_mode, op0))));
14993 op0 = temp;
14994 break;
14995
14996 case UNGE:
14997 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
14998 temp = gen_reg_rtx (result_mode);
14999 emit_insn (gen_rtx_SET (temp,
15000 gen_rtx_IF_THEN_ELSE (result_mode,
15001 gen_rtx_GE (VOIDmode,
15002 op0, op1),
15003 true_cond, false_cond)));
15004 false_cond = true_cond;
15005 true_cond = temp;
15006
15007 temp = gen_reg_rtx (compare_mode);
15008 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15009 op0 = temp;
15010 break;
15011
15012 case GT:
15013 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15014 temp = gen_reg_rtx (result_mode);
15015 emit_insn (gen_rtx_SET (temp,
15016 gen_rtx_IF_THEN_ELSE (result_mode,
15017 gen_rtx_GE (VOIDmode,
15018 op0, op1),
15019 true_cond, false_cond)));
15020 true_cond = false_cond;
15021 false_cond = temp;
15022
15023 temp = gen_reg_rtx (compare_mode);
15024 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15025 op0 = temp;
15026 break;
15027
15028 default:
15029 gcc_unreachable ();
15030 }
15031
15032 emit_insn (gen_rtx_SET (dest,
15033 gen_rtx_IF_THEN_ELSE (result_mode,
15034 gen_rtx_GE (VOIDmode,
15035 op0, op1),
15036 true_cond, false_cond)));
15037 return 1;
15038 }
15039
15040 /* Same as above, but for ints (isel). */
15041
15042 int
15043 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15044 {
15045 rtx condition_rtx, cr;
15046 machine_mode mode = GET_MODE (dest);
15047 enum rtx_code cond_code;
15048 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15049 bool signedp;
15050
15051 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15052 return 0;
15053
15054 /* We still have to do the compare, because isel doesn't do a
15055 compare, it just looks at the CRx bits set by a previous compare
15056 instruction. */
15057 condition_rtx = rs6000_generate_compare (op, mode);
15058 cond_code = GET_CODE (condition_rtx);
15059 cr = XEXP (condition_rtx, 0);
15060 signedp = GET_MODE (cr) == CCmode;
15061
15062 isel_func = (mode == SImode
15063 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15064 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15065
15066 switch (cond_code)
15067 {
15068 case LT: case GT: case LTU: case GTU: case EQ:
15069 /* isel handles these directly. */
15070 break;
15071
15072 default:
15073 /* We need to swap the sense of the comparison. */
15074 {
15075 std::swap (false_cond, true_cond);
15076 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15077 }
15078 break;
15079 }
15080
15081 false_cond = force_reg (mode, false_cond);
15082 if (true_cond != const0_rtx)
15083 true_cond = force_reg (mode, true_cond);
15084
15085 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15086
15087 return 1;
15088 }
15089
15090 void
15091 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15092 {
15093 machine_mode mode = GET_MODE (op0);
15094 enum rtx_code c;
15095 rtx target;
15096
15097 /* VSX/altivec have direct min/max insns. */
15098 if ((code == SMAX || code == SMIN)
15099 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15100 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15101 {
15102 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15103 return;
15104 }
15105
15106 if (code == SMAX || code == SMIN)
15107 c = GE;
15108 else
15109 c = GEU;
15110
15111 if (code == SMAX || code == UMAX)
15112 target = emit_conditional_move (dest, c, op0, op1, mode,
15113 op0, op1, mode, 0);
15114 else
15115 target = emit_conditional_move (dest, c, op0, op1, mode,
15116 op1, op0, mode, 0);
15117 gcc_assert (target);
15118 if (target != dest)
15119 emit_move_insn (dest, target);
15120 }
15121
15122 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15123 COND is true. Mark the jump as unlikely to be taken. */
15124
15125 static void
15126 emit_unlikely_jump (rtx cond, rtx label)
15127 {
15128 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15129 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15130 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15131 }
15132
15133 /* A subroutine of the atomic operation splitters. Emit a load-locked
15134 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15135 the zero_extend operation. */
15136
15137 static void
15138 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15139 {
15140 rtx (*fn) (rtx, rtx) = NULL;
15141
15142 switch (mode)
15143 {
15144 case E_QImode:
15145 fn = gen_load_lockedqi;
15146 break;
15147 case E_HImode:
15148 fn = gen_load_lockedhi;
15149 break;
15150 case E_SImode:
15151 if (GET_MODE (mem) == QImode)
15152 fn = gen_load_lockedqi_si;
15153 else if (GET_MODE (mem) == HImode)
15154 fn = gen_load_lockedhi_si;
15155 else
15156 fn = gen_load_lockedsi;
15157 break;
15158 case E_DImode:
15159 fn = gen_load_lockeddi;
15160 break;
15161 case E_TImode:
15162 fn = gen_load_lockedti;
15163 break;
15164 default:
15165 gcc_unreachable ();
15166 }
15167 emit_insn (fn (reg, mem));
15168 }
15169
15170 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15171 instruction in MODE. */
15172
15173 static void
15174 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15175 {
15176 rtx (*fn) (rtx, rtx, rtx) = NULL;
15177
15178 switch (mode)
15179 {
15180 case E_QImode:
15181 fn = gen_store_conditionalqi;
15182 break;
15183 case E_HImode:
15184 fn = gen_store_conditionalhi;
15185 break;
15186 case E_SImode:
15187 fn = gen_store_conditionalsi;
15188 break;
15189 case E_DImode:
15190 fn = gen_store_conditionaldi;
15191 break;
15192 case E_TImode:
15193 fn = gen_store_conditionalti;
15194 break;
15195 default:
15196 gcc_unreachable ();
15197 }
15198
15199 /* Emit sync before stwcx. to address PPC405 Erratum. */
15200 if (PPC405_ERRATUM77)
15201 emit_insn (gen_hwsync ());
15202
15203 emit_insn (fn (res, mem, val));
15204 }
15205
15206 /* Expand barriers before and after a load_locked/store_cond sequence. */
15207
15208 static rtx
15209 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15210 {
15211 rtx addr = XEXP (mem, 0);
15212
15213 if (!legitimate_indirect_address_p (addr, reload_completed)
15214 && !legitimate_indexed_address_p (addr, reload_completed))
15215 {
15216 addr = force_reg (Pmode, addr);
15217 mem = replace_equiv_address_nv (mem, addr);
15218 }
15219
15220 switch (model)
15221 {
15222 case MEMMODEL_RELAXED:
15223 case MEMMODEL_CONSUME:
15224 case MEMMODEL_ACQUIRE:
15225 break;
15226 case MEMMODEL_RELEASE:
15227 case MEMMODEL_ACQ_REL:
15228 emit_insn (gen_lwsync ());
15229 break;
15230 case MEMMODEL_SEQ_CST:
15231 emit_insn (gen_hwsync ());
15232 break;
15233 default:
15234 gcc_unreachable ();
15235 }
15236 return mem;
15237 }
15238
15239 static void
15240 rs6000_post_atomic_barrier (enum memmodel model)
15241 {
15242 switch (model)
15243 {
15244 case MEMMODEL_RELAXED:
15245 case MEMMODEL_CONSUME:
15246 case MEMMODEL_RELEASE:
15247 break;
15248 case MEMMODEL_ACQUIRE:
15249 case MEMMODEL_ACQ_REL:
15250 case MEMMODEL_SEQ_CST:
15251 emit_insn (gen_isync ());
15252 break;
15253 default:
15254 gcc_unreachable ();
15255 }
15256 }
15257
15258 /* A subroutine of the various atomic expanders. For sub-word operations,
15259 we must adjust things to operate on SImode. Given the original MEM,
15260 return a new aligned memory. Also build and return the quantities by
15261 which to shift and mask. */
15262
15263 static rtx
15264 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15265 {
15266 rtx addr, align, shift, mask, mem;
15267 HOST_WIDE_INT shift_mask;
15268 machine_mode mode = GET_MODE (orig_mem);
15269
15270 /* For smaller modes, we have to implement this via SImode. */
15271 shift_mask = (mode == QImode ? 0x18 : 0x10);
15272
15273 addr = XEXP (orig_mem, 0);
15274 addr = force_reg (GET_MODE (addr), addr);
15275
15276 /* Aligned memory containing subword. Generate a new memory. We
15277 do not want any of the existing MEM_ATTR data, as we're now
15278 accessing memory outside the original object. */
15279 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15280 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15281 mem = gen_rtx_MEM (SImode, align);
15282 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15283 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15284 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15285
15286 /* Shift amount for subword relative to aligned word. */
15287 shift = gen_reg_rtx (SImode);
15288 addr = gen_lowpart (SImode, addr);
15289 rtx tmp = gen_reg_rtx (SImode);
15290 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15291 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15292 if (BYTES_BIG_ENDIAN)
15293 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15294 shift, 1, OPTAB_LIB_WIDEN);
15295 *pshift = shift;
15296
15297 /* Mask for insertion. */
15298 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15299 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15300 *pmask = mask;
15301
15302 return mem;
15303 }
15304
15305 /* A subroutine of the various atomic expanders. For sub-word operands,
15306 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15307
15308 static rtx
15309 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15310 {
15311 rtx x;
15312
15313 x = gen_reg_rtx (SImode);
15314 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15315 gen_rtx_NOT (SImode, mask),
15316 oldval)));
15317
15318 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15319
15320 return x;
15321 }
15322
15323 /* A subroutine of the various atomic expanders. For sub-word operands,
15324 extract WIDE to NARROW via SHIFT. */
15325
15326 static void
15327 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15328 {
15329 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15330 wide, 1, OPTAB_LIB_WIDEN);
15331 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15332 }
15333
15334 /* Expand an atomic compare and swap operation. */
15335
15336 void
15337 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15338 {
15339 rtx boolval, retval, mem, oldval, newval, cond;
15340 rtx label1, label2, x, mask, shift;
15341 machine_mode mode, orig_mode;
15342 enum memmodel mod_s, mod_f;
15343 bool is_weak;
15344
15345 boolval = operands[0];
15346 retval = operands[1];
15347 mem = operands[2];
15348 oldval = operands[3];
15349 newval = operands[4];
15350 is_weak = (INTVAL (operands[5]) != 0);
15351 mod_s = memmodel_base (INTVAL (operands[6]));
15352 mod_f = memmodel_base (INTVAL (operands[7]));
15353 orig_mode = mode = GET_MODE (mem);
15354
15355 mask = shift = NULL_RTX;
15356 if (mode == QImode || mode == HImode)
15357 {
15358 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15359 lwarx and shift/mask operations. With power8, we need to do the
15360 comparison in SImode, but the store is still done in QI/HImode. */
15361 oldval = convert_modes (SImode, mode, oldval, 1);
15362
15363 if (!TARGET_SYNC_HI_QI)
15364 {
15365 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15366
15367 /* Shift and mask OLDVAL into position with the word. */
15368 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15369 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15370
15371 /* Shift and mask NEWVAL into position within the word. */
15372 newval = convert_modes (SImode, mode, newval, 1);
15373 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15374 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15375 }
15376
15377 /* Prepare to adjust the return value. */
15378 retval = gen_reg_rtx (SImode);
15379 mode = SImode;
15380 }
15381 else if (reg_overlap_mentioned_p (retval, oldval))
15382 oldval = copy_to_reg (oldval);
15383
15384 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15385 oldval = copy_to_mode_reg (mode, oldval);
15386
15387 if (reg_overlap_mentioned_p (retval, newval))
15388 newval = copy_to_reg (newval);
15389
15390 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15391
15392 label1 = NULL_RTX;
15393 if (!is_weak)
15394 {
15395 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15396 emit_label (XEXP (label1, 0));
15397 }
15398 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15399
15400 emit_load_locked (mode, retval, mem);
15401
15402 x = retval;
15403 if (mask)
15404 x = expand_simple_binop (SImode, AND, retval, mask,
15405 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15406
15407 cond = gen_reg_rtx (CCmode);
15408 /* If we have TImode, synthesize a comparison. */
15409 if (mode != TImode)
15410 x = gen_rtx_COMPARE (CCmode, x, oldval);
15411 else
15412 {
15413 rtx xor1_result = gen_reg_rtx (DImode);
15414 rtx xor2_result = gen_reg_rtx (DImode);
15415 rtx or_result = gen_reg_rtx (DImode);
15416 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15417 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15418 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15419 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15420
15421 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15422 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15423 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15424 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15425 }
15426
15427 emit_insn (gen_rtx_SET (cond, x));
15428
15429 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15430 emit_unlikely_jump (x, label2);
15431
15432 x = newval;
15433 if (mask)
15434 x = rs6000_mask_atomic_subword (retval, newval, mask);
15435
15436 emit_store_conditional (orig_mode, cond, mem, x);
15437
15438 if (!is_weak)
15439 {
15440 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15441 emit_unlikely_jump (x, label1);
15442 }
15443
15444 if (!is_mm_relaxed (mod_f))
15445 emit_label (XEXP (label2, 0));
15446
15447 rs6000_post_atomic_barrier (mod_s);
15448
15449 if (is_mm_relaxed (mod_f))
15450 emit_label (XEXP (label2, 0));
15451
15452 if (shift)
15453 rs6000_finish_atomic_subword (operands[1], retval, shift);
15454 else if (mode != GET_MODE (operands[1]))
15455 convert_move (operands[1], retval, 1);
15456
15457 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15458 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15459 emit_insn (gen_rtx_SET (boolval, x));
15460 }
15461
15462 /* Expand an atomic exchange operation. */
15463
15464 void
15465 rs6000_expand_atomic_exchange (rtx operands[])
15466 {
15467 rtx retval, mem, val, cond;
15468 machine_mode mode;
15469 enum memmodel model;
15470 rtx label, x, mask, shift;
15471
15472 retval = operands[0];
15473 mem = operands[1];
15474 val = operands[2];
15475 model = memmodel_base (INTVAL (operands[3]));
15476 mode = GET_MODE (mem);
15477
15478 mask = shift = NULL_RTX;
15479 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15480 {
15481 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15482
15483 /* Shift and mask VAL into position with the word. */
15484 val = convert_modes (SImode, mode, val, 1);
15485 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15486 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15487
15488 /* Prepare to adjust the return value. */
15489 retval = gen_reg_rtx (SImode);
15490 mode = SImode;
15491 }
15492
15493 mem = rs6000_pre_atomic_barrier (mem, model);
15494
15495 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15496 emit_label (XEXP (label, 0));
15497
15498 emit_load_locked (mode, retval, mem);
15499
15500 x = val;
15501 if (mask)
15502 x = rs6000_mask_atomic_subword (retval, val, mask);
15503
15504 cond = gen_reg_rtx (CCmode);
15505 emit_store_conditional (mode, cond, mem, x);
15506
15507 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15508 emit_unlikely_jump (x, label);
15509
15510 rs6000_post_atomic_barrier (model);
15511
15512 if (shift)
15513 rs6000_finish_atomic_subword (operands[0], retval, shift);
15514 }
15515
15516 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15517 to perform. MEM is the memory on which to operate. VAL is the second
15518 operand of the binary operator. BEFORE and AFTER are optional locations to
15519 return the value of MEM either before of after the operation. MODEL_RTX
15520 is a CONST_INT containing the memory model to use. */
15521
15522 void
15523 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15524 rtx orig_before, rtx orig_after, rtx model_rtx)
15525 {
15526 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15527 machine_mode mode = GET_MODE (mem);
15528 machine_mode store_mode = mode;
15529 rtx label, x, cond, mask, shift;
15530 rtx before = orig_before, after = orig_after;
15531
15532 mask = shift = NULL_RTX;
15533 /* On power8, we want to use SImode for the operation. On previous systems,
15534 use the operation in a subword and shift/mask to get the proper byte or
15535 halfword. */
15536 if (mode == QImode || mode == HImode)
15537 {
15538 if (TARGET_SYNC_HI_QI)
15539 {
15540 val = convert_modes (SImode, mode, val, 1);
15541
15542 /* Prepare to adjust the return value. */
15543 before = gen_reg_rtx (SImode);
15544 if (after)
15545 after = gen_reg_rtx (SImode);
15546 mode = SImode;
15547 }
15548 else
15549 {
15550 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15551
15552 /* Shift and mask VAL into position with the word. */
15553 val = convert_modes (SImode, mode, val, 1);
15554 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15555 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15556
15557 switch (code)
15558 {
15559 case IOR:
15560 case XOR:
15561 /* We've already zero-extended VAL. That is sufficient to
15562 make certain that it does not affect other bits. */
15563 mask = NULL;
15564 break;
15565
15566 case AND:
15567 /* If we make certain that all of the other bits in VAL are
15568 set, that will be sufficient to not affect other bits. */
15569 x = gen_rtx_NOT (SImode, mask);
15570 x = gen_rtx_IOR (SImode, x, val);
15571 emit_insn (gen_rtx_SET (val, x));
15572 mask = NULL;
15573 break;
15574
15575 case NOT:
15576 case PLUS:
15577 case MINUS:
15578 /* These will all affect bits outside the field and need
15579 adjustment via MASK within the loop. */
15580 break;
15581
15582 default:
15583 gcc_unreachable ();
15584 }
15585
15586 /* Prepare to adjust the return value. */
15587 before = gen_reg_rtx (SImode);
15588 if (after)
15589 after = gen_reg_rtx (SImode);
15590 store_mode = mode = SImode;
15591 }
15592 }
15593
15594 mem = rs6000_pre_atomic_barrier (mem, model);
15595
15596 label = gen_label_rtx ();
15597 emit_label (label);
15598 label = gen_rtx_LABEL_REF (VOIDmode, label);
15599
15600 if (before == NULL_RTX)
15601 before = gen_reg_rtx (mode);
15602
15603 emit_load_locked (mode, before, mem);
15604
15605 if (code == NOT)
15606 {
15607 x = expand_simple_binop (mode, AND, before, val,
15608 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15609 after = expand_simple_unop (mode, NOT, x, after, 1);
15610 }
15611 else
15612 {
15613 after = expand_simple_binop (mode, code, before, val,
15614 after, 1, OPTAB_LIB_WIDEN);
15615 }
15616
15617 x = after;
15618 if (mask)
15619 {
15620 x = expand_simple_binop (SImode, AND, after, mask,
15621 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15622 x = rs6000_mask_atomic_subword (before, x, mask);
15623 }
15624 else if (store_mode != mode)
15625 x = convert_modes (store_mode, mode, x, 1);
15626
15627 cond = gen_reg_rtx (CCmode);
15628 emit_store_conditional (store_mode, cond, mem, x);
15629
15630 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15631 emit_unlikely_jump (x, label);
15632
15633 rs6000_post_atomic_barrier (model);
15634
15635 if (shift)
15636 {
15637 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15638 then do the calcuations in a SImode register. */
15639 if (orig_before)
15640 rs6000_finish_atomic_subword (orig_before, before, shift);
15641 if (orig_after)
15642 rs6000_finish_atomic_subword (orig_after, after, shift);
15643 }
15644 else if (store_mode != mode)
15645 {
15646 /* QImode/HImode on machines with lbarx/lharx where we do the native
15647 operation and then do the calcuations in a SImode register. */
15648 if (orig_before)
15649 convert_move (orig_before, before, 1);
15650 if (orig_after)
15651 convert_move (orig_after, after, 1);
15652 }
15653 else if (orig_after && after != orig_after)
15654 emit_move_insn (orig_after, after);
15655 }
15656
15657 /* Emit instructions to move SRC to DST. Called by splitters for
15658 multi-register moves. It will emit at most one instruction for
15659 each register that is accessed; that is, it won't emit li/lis pairs
15660 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15661 register. */
15662
15663 void
15664 rs6000_split_multireg_move (rtx dst, rtx src)
15665 {
15666 /* The register number of the first register being moved. */
15667 int reg;
15668 /* The mode that is to be moved. */
15669 machine_mode mode;
15670 /* The mode that the move is being done in, and its size. */
15671 machine_mode reg_mode;
15672 int reg_mode_size;
15673 /* The number of registers that will be moved. */
15674 int nregs;
15675
15676 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15677 mode = GET_MODE (dst);
15678 nregs = hard_regno_nregs (reg, mode);
15679 if (FP_REGNO_P (reg))
15680 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15681 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15682 else if (ALTIVEC_REGNO_P (reg))
15683 reg_mode = V16QImode;
15684 else
15685 reg_mode = word_mode;
15686 reg_mode_size = GET_MODE_SIZE (reg_mode);
15687
15688 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15689
15690 /* TDmode residing in FP registers is special, since the ISA requires that
15691 the lower-numbered word of a register pair is always the most significant
15692 word, even in little-endian mode. This does not match the usual subreg
15693 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15694 the appropriate constituent registers "by hand" in little-endian mode.
15695
15696 Note we do not need to check for destructive overlap here since TDmode
15697 can only reside in even/odd register pairs. */
15698 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15699 {
15700 rtx p_src, p_dst;
15701 int i;
15702
15703 for (i = 0; i < nregs; i++)
15704 {
15705 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15706 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15707 else
15708 p_src = simplify_gen_subreg (reg_mode, src, mode,
15709 i * reg_mode_size);
15710
15711 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15712 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15713 else
15714 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15715 i * reg_mode_size);
15716
15717 emit_insn (gen_rtx_SET (p_dst, p_src));
15718 }
15719
15720 return;
15721 }
15722
15723 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15724 {
15725 /* Move register range backwards, if we might have destructive
15726 overlap. */
15727 int i;
15728 for (i = nregs - 1; i >= 0; i--)
15729 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15730 i * reg_mode_size),
15731 simplify_gen_subreg (reg_mode, src, mode,
15732 i * reg_mode_size)));
15733 }
15734 else
15735 {
15736 int i;
15737 int j = -1;
15738 bool used_update = false;
15739 rtx restore_basereg = NULL_RTX;
15740
15741 if (MEM_P (src) && INT_REGNO_P (reg))
15742 {
15743 rtx breg;
15744
15745 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15746 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15747 {
15748 rtx delta_rtx;
15749 breg = XEXP (XEXP (src, 0), 0);
15750 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15751 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15752 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15753 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15754 src = replace_equiv_address (src, breg);
15755 }
15756 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15757 {
15758 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15759 {
15760 rtx basereg = XEXP (XEXP (src, 0), 0);
15761 if (TARGET_UPDATE)
15762 {
15763 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15764 emit_insn (gen_rtx_SET (ndst,
15765 gen_rtx_MEM (reg_mode,
15766 XEXP (src, 0))));
15767 used_update = true;
15768 }
15769 else
15770 emit_insn (gen_rtx_SET (basereg,
15771 XEXP (XEXP (src, 0), 1)));
15772 src = replace_equiv_address (src, basereg);
15773 }
15774 else
15775 {
15776 rtx basereg = gen_rtx_REG (Pmode, reg);
15777 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15778 src = replace_equiv_address (src, basereg);
15779 }
15780 }
15781
15782 breg = XEXP (src, 0);
15783 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15784 breg = XEXP (breg, 0);
15785
15786 /* If the base register we are using to address memory is
15787 also a destination reg, then change that register last. */
15788 if (REG_P (breg)
15789 && REGNO (breg) >= REGNO (dst)
15790 && REGNO (breg) < REGNO (dst) + nregs)
15791 j = REGNO (breg) - REGNO (dst);
15792 }
15793 else if (MEM_P (dst) && INT_REGNO_P (reg))
15794 {
15795 rtx breg;
15796
15797 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15798 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15799 {
15800 rtx delta_rtx;
15801 breg = XEXP (XEXP (dst, 0), 0);
15802 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15803 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15804 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15805
15806 /* We have to update the breg before doing the store.
15807 Use store with update, if available. */
15808
15809 if (TARGET_UPDATE)
15810 {
15811 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15812 emit_insn (TARGET_32BIT
15813 ? (TARGET_POWERPC64
15814 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15815 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15816 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15817 used_update = true;
15818 }
15819 else
15820 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15821 dst = replace_equiv_address (dst, breg);
15822 }
15823 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15824 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15825 {
15826 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15827 {
15828 rtx basereg = XEXP (XEXP (dst, 0), 0);
15829 if (TARGET_UPDATE)
15830 {
15831 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15832 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15833 XEXP (dst, 0)),
15834 nsrc));
15835 used_update = true;
15836 }
15837 else
15838 emit_insn (gen_rtx_SET (basereg,
15839 XEXP (XEXP (dst, 0), 1)));
15840 dst = replace_equiv_address (dst, basereg);
15841 }
15842 else
15843 {
15844 rtx basereg = XEXP (XEXP (dst, 0), 0);
15845 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15846 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15847 && REG_P (basereg)
15848 && REG_P (offsetreg)
15849 && REGNO (basereg) != REGNO (offsetreg));
15850 if (REGNO (basereg) == 0)
15851 {
15852 rtx tmp = offsetreg;
15853 offsetreg = basereg;
15854 basereg = tmp;
15855 }
15856 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15857 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15858 dst = replace_equiv_address (dst, basereg);
15859 }
15860 }
15861 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15862 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15863 }
15864
15865 for (i = 0; i < nregs; i++)
15866 {
15867 /* Calculate index to next subword. */
15868 ++j;
15869 if (j == nregs)
15870 j = 0;
15871
15872 /* If compiler already emitted move of first word by
15873 store with update, no need to do anything. */
15874 if (j == 0 && used_update)
15875 continue;
15876
15877 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15878 j * reg_mode_size),
15879 simplify_gen_subreg (reg_mode, src, mode,
15880 j * reg_mode_size)));
15881 }
15882 if (restore_basereg != NULL_RTX)
15883 emit_insn (restore_basereg);
15884 }
15885 }
15886
15887 static GTY(()) alias_set_type TOC_alias_set = -1;
15888
15889 alias_set_type
15890 get_TOC_alias_set (void)
15891 {
15892 if (TOC_alias_set == -1)
15893 TOC_alias_set = new_alias_set ();
15894 return TOC_alias_set;
15895 }
15896
15897 /* The mode the ABI uses for a word. This is not the same as word_mode
15898 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15899
15900 static scalar_int_mode
15901 rs6000_abi_word_mode (void)
15902 {
15903 return TARGET_32BIT ? SImode : DImode;
15904 }
15905
15906 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15907 static char *
15908 rs6000_offload_options (void)
15909 {
15910 if (TARGET_64BIT)
15911 return xstrdup ("-foffload-abi=lp64");
15912 else
15913 return xstrdup ("-foffload-abi=ilp32");
15914 }
15915
15916 \f
15917 /* A quick summary of the various types of 'constant-pool tables'
15918 under PowerPC:
15919
15920 Target Flags Name One table per
15921 AIX (none) AIX TOC object file
15922 AIX -mfull-toc AIX TOC object file
15923 AIX -mminimal-toc AIX minimal TOC translation unit
15924 SVR4/EABI (none) SVR4 SDATA object file
15925 SVR4/EABI -fpic SVR4 pic object file
15926 SVR4/EABI -fPIC SVR4 PIC translation unit
15927 SVR4/EABI -mrelocatable EABI TOC function
15928 SVR4/EABI -maix AIX TOC object file
15929 SVR4/EABI -maix -mminimal-toc
15930 AIX minimal TOC translation unit
15931
15932 Name Reg. Set by entries contains:
15933 made by addrs? fp? sum?
15934
15935 AIX TOC 2 crt0 as Y option option
15936 AIX minimal TOC 30 prolog gcc Y Y option
15937 SVR4 SDATA 13 crt0 gcc N Y N
15938 SVR4 pic 30 prolog ld Y not yet N
15939 SVR4 PIC 30 prolog gcc Y option option
15940 EABI TOC 30 prolog gcc Y option option
15941
15942 */
15943
15944 /* Hash functions for the hash table. */
15945
15946 static unsigned
15947 rs6000_hash_constant (rtx k)
15948 {
15949 enum rtx_code code = GET_CODE (k);
15950 machine_mode mode = GET_MODE (k);
15951 unsigned result = (code << 3) ^ mode;
15952 const char *format;
15953 int flen, fidx;
15954
15955 format = GET_RTX_FORMAT (code);
15956 flen = strlen (format);
15957 fidx = 0;
15958
15959 switch (code)
15960 {
15961 case LABEL_REF:
15962 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
15963
15964 case CONST_WIDE_INT:
15965 {
15966 int i;
15967 flen = CONST_WIDE_INT_NUNITS (k);
15968 for (i = 0; i < flen; i++)
15969 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
15970 return result;
15971 }
15972
15973 case CONST_DOUBLE:
15974 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
15975
15976 case CODE_LABEL:
15977 fidx = 3;
15978 break;
15979
15980 default:
15981 break;
15982 }
15983
15984 for (; fidx < flen; fidx++)
15985 switch (format[fidx])
15986 {
15987 case 's':
15988 {
15989 unsigned i, len;
15990 const char *str = XSTR (k, fidx);
15991 len = strlen (str);
15992 result = result * 613 + len;
15993 for (i = 0; i < len; i++)
15994 result = result * 613 + (unsigned) str[i];
15995 break;
15996 }
15997 case 'u':
15998 case 'e':
15999 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16000 break;
16001 case 'i':
16002 case 'n':
16003 result = result * 613 + (unsigned) XINT (k, fidx);
16004 break;
16005 case 'w':
16006 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16007 result = result * 613 + (unsigned) XWINT (k, fidx);
16008 else
16009 {
16010 size_t i;
16011 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16012 result = result * 613 + (unsigned) (XWINT (k, fidx)
16013 >> CHAR_BIT * i);
16014 }
16015 break;
16016 case '0':
16017 break;
16018 default:
16019 gcc_unreachable ();
16020 }
16021
16022 return result;
16023 }
16024
16025 hashval_t
16026 toc_hasher::hash (toc_hash_struct *thc)
16027 {
16028 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16029 }
16030
16031 /* Compare H1 and H2 for equivalence. */
16032
16033 bool
16034 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16035 {
16036 rtx r1 = h1->key;
16037 rtx r2 = h2->key;
16038
16039 if (h1->key_mode != h2->key_mode)
16040 return 0;
16041
16042 return rtx_equal_p (r1, r2);
16043 }
16044
16045 /* These are the names given by the C++ front-end to vtables, and
16046 vtable-like objects. Ideally, this logic should not be here;
16047 instead, there should be some programmatic way of inquiring as
16048 to whether or not an object is a vtable. */
16049
16050 #define VTABLE_NAME_P(NAME) \
16051 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16052 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16053 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16054 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16055 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16056
16057 #ifdef NO_DOLLAR_IN_LABEL
16058 /* Return a GGC-allocated character string translating dollar signs in
16059 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16060
16061 const char *
16062 rs6000_xcoff_strip_dollar (const char *name)
16063 {
16064 char *strip, *p;
16065 const char *q;
16066 size_t len;
16067
16068 q = (const char *) strchr (name, '$');
16069
16070 if (q == 0 || q == name)
16071 return name;
16072
16073 len = strlen (name);
16074 strip = XALLOCAVEC (char, len + 1);
16075 strcpy (strip, name);
16076 p = strip + (q - name);
16077 while (p)
16078 {
16079 *p = '_';
16080 p = strchr (p + 1, '$');
16081 }
16082
16083 return ggc_alloc_string (strip, len);
16084 }
16085 #endif
16086
16087 void
16088 rs6000_output_symbol_ref (FILE *file, rtx x)
16089 {
16090 const char *name = XSTR (x, 0);
16091
16092 /* Currently C++ toc references to vtables can be emitted before it
16093 is decided whether the vtable is public or private. If this is
16094 the case, then the linker will eventually complain that there is
16095 a reference to an unknown section. Thus, for vtables only,
16096 we emit the TOC reference to reference the identifier and not the
16097 symbol. */
16098 if (VTABLE_NAME_P (name))
16099 {
16100 RS6000_OUTPUT_BASENAME (file, name);
16101 }
16102 else
16103 assemble_name (file, name);
16104 }
16105
16106 /* Output a TOC entry. We derive the entry name from what is being
16107 written. */
16108
16109 void
16110 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16111 {
16112 char buf[256];
16113 const char *name = buf;
16114 rtx base = x;
16115 HOST_WIDE_INT offset = 0;
16116
16117 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16118
16119 /* When the linker won't eliminate them, don't output duplicate
16120 TOC entries (this happens on AIX if there is any kind of TOC,
16121 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16122 CODE_LABELs. */
16123 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16124 {
16125 struct toc_hash_struct *h;
16126
16127 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16128 time because GGC is not initialized at that point. */
16129 if (toc_hash_table == NULL)
16130 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16131
16132 h = ggc_alloc<toc_hash_struct> ();
16133 h->key = x;
16134 h->key_mode = mode;
16135 h->labelno = labelno;
16136
16137 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16138 if (*found == NULL)
16139 *found = h;
16140 else /* This is indeed a duplicate.
16141 Set this label equal to that label. */
16142 {
16143 fputs ("\t.set ", file);
16144 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16145 fprintf (file, "%d,", labelno);
16146 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16147 fprintf (file, "%d\n", ((*found)->labelno));
16148
16149 #ifdef HAVE_AS_TLS
16150 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16151 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16152 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16153 {
16154 fputs ("\t.set ", file);
16155 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16156 fprintf (file, "%d,", labelno);
16157 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16158 fprintf (file, "%d\n", ((*found)->labelno));
16159 }
16160 #endif
16161 return;
16162 }
16163 }
16164
16165 /* If we're going to put a double constant in the TOC, make sure it's
16166 aligned properly when strict alignment is on. */
16167 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16168 && STRICT_ALIGNMENT
16169 && GET_MODE_BITSIZE (mode) >= 64
16170 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16171 ASM_OUTPUT_ALIGN (file, 3);
16172 }
16173
16174 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16175
16176 /* Handle FP constants specially. Note that if we have a minimal
16177 TOC, things we put here aren't actually in the TOC, so we can allow
16178 FP constants. */
16179 if (CONST_DOUBLE_P (x)
16180 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16181 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16182 {
16183 long k[4];
16184
16185 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16186 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16187 else
16188 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16189
16190 if (TARGET_64BIT)
16191 {
16192 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16193 fputs (DOUBLE_INT_ASM_OP, file);
16194 else
16195 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16196 k[0] & 0xffffffff, k[1] & 0xffffffff,
16197 k[2] & 0xffffffff, k[3] & 0xffffffff);
16198 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16199 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16200 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16201 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16202 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16203 return;
16204 }
16205 else
16206 {
16207 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16208 fputs ("\t.long ", file);
16209 else
16210 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16211 k[0] & 0xffffffff, k[1] & 0xffffffff,
16212 k[2] & 0xffffffff, k[3] & 0xffffffff);
16213 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16214 k[0] & 0xffffffff, k[1] & 0xffffffff,
16215 k[2] & 0xffffffff, k[3] & 0xffffffff);
16216 return;
16217 }
16218 }
16219 else if (CONST_DOUBLE_P (x)
16220 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16221 {
16222 long k[2];
16223
16224 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16225 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16226 else
16227 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16228
16229 if (TARGET_64BIT)
16230 {
16231 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16232 fputs (DOUBLE_INT_ASM_OP, file);
16233 else
16234 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16235 k[0] & 0xffffffff, k[1] & 0xffffffff);
16236 fprintf (file, "0x%lx%08lx\n",
16237 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16238 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16239 return;
16240 }
16241 else
16242 {
16243 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16244 fputs ("\t.long ", file);
16245 else
16246 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16247 k[0] & 0xffffffff, k[1] & 0xffffffff);
16248 fprintf (file, "0x%lx,0x%lx\n",
16249 k[0] & 0xffffffff, k[1] & 0xffffffff);
16250 return;
16251 }
16252 }
16253 else if (CONST_DOUBLE_P (x)
16254 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16255 {
16256 long l;
16257
16258 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16259 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16260 else
16261 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16262
16263 if (TARGET_64BIT)
16264 {
16265 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16266 fputs (DOUBLE_INT_ASM_OP, file);
16267 else
16268 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16269 if (WORDS_BIG_ENDIAN)
16270 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16271 else
16272 fprintf (file, "0x%lx\n", l & 0xffffffff);
16273 return;
16274 }
16275 else
16276 {
16277 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16278 fputs ("\t.long ", file);
16279 else
16280 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16281 fprintf (file, "0x%lx\n", l & 0xffffffff);
16282 return;
16283 }
16284 }
16285 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16286 {
16287 unsigned HOST_WIDE_INT low;
16288 HOST_WIDE_INT high;
16289
16290 low = INTVAL (x) & 0xffffffff;
16291 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16292
16293 /* TOC entries are always Pmode-sized, so when big-endian
16294 smaller integer constants in the TOC need to be padded.
16295 (This is still a win over putting the constants in
16296 a separate constant pool, because then we'd have
16297 to have both a TOC entry _and_ the actual constant.)
16298
16299 For a 32-bit target, CONST_INT values are loaded and shifted
16300 entirely within `low' and can be stored in one TOC entry. */
16301
16302 /* It would be easy to make this work, but it doesn't now. */
16303 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16304
16305 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16306 {
16307 low |= high << 32;
16308 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16309 high = (HOST_WIDE_INT) low >> 32;
16310 low &= 0xffffffff;
16311 }
16312
16313 if (TARGET_64BIT)
16314 {
16315 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16316 fputs (DOUBLE_INT_ASM_OP, file);
16317 else
16318 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16319 (long) high & 0xffffffff, (long) low & 0xffffffff);
16320 fprintf (file, "0x%lx%08lx\n",
16321 (long) high & 0xffffffff, (long) low & 0xffffffff);
16322 return;
16323 }
16324 else
16325 {
16326 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16327 {
16328 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16329 fputs ("\t.long ", file);
16330 else
16331 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16332 (long) high & 0xffffffff, (long) low & 0xffffffff);
16333 fprintf (file, "0x%lx,0x%lx\n",
16334 (long) high & 0xffffffff, (long) low & 0xffffffff);
16335 }
16336 else
16337 {
16338 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16339 fputs ("\t.long ", file);
16340 else
16341 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16342 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16343 }
16344 return;
16345 }
16346 }
16347
16348 if (GET_CODE (x) == CONST)
16349 {
16350 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16351 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16352
16353 base = XEXP (XEXP (x, 0), 0);
16354 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16355 }
16356
16357 switch (GET_CODE (base))
16358 {
16359 case SYMBOL_REF:
16360 name = XSTR (base, 0);
16361 break;
16362
16363 case LABEL_REF:
16364 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16365 CODE_LABEL_NUMBER (XEXP (base, 0)));
16366 break;
16367
16368 case CODE_LABEL:
16369 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16370 break;
16371
16372 default:
16373 gcc_unreachable ();
16374 }
16375
16376 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16377 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16378 else
16379 {
16380 fputs ("\t.tc ", file);
16381 RS6000_OUTPUT_BASENAME (file, name);
16382
16383 if (offset < 0)
16384 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16385 else if (offset)
16386 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16387
16388 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16389 after other TOC symbols, reducing overflow of small TOC access
16390 to [TC] symbols. */
16391 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16392 ? "[TE]," : "[TC],", file);
16393 }
16394
16395 /* Currently C++ toc references to vtables can be emitted before it
16396 is decided whether the vtable is public or private. If this is
16397 the case, then the linker will eventually complain that there is
16398 a TOC reference to an unknown section. Thus, for vtables only,
16399 we emit the TOC reference to reference the symbol and not the
16400 section. */
16401 if (VTABLE_NAME_P (name))
16402 {
16403 RS6000_OUTPUT_BASENAME (file, name);
16404 if (offset < 0)
16405 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16406 else if (offset > 0)
16407 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16408 }
16409 else
16410 output_addr_const (file, x);
16411
16412 #if HAVE_AS_TLS
16413 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16414 {
16415 switch (SYMBOL_REF_TLS_MODEL (base))
16416 {
16417 case 0:
16418 break;
16419 case TLS_MODEL_LOCAL_EXEC:
16420 fputs ("@le", file);
16421 break;
16422 case TLS_MODEL_INITIAL_EXEC:
16423 fputs ("@ie", file);
16424 break;
16425 /* Use global-dynamic for local-dynamic. */
16426 case TLS_MODEL_GLOBAL_DYNAMIC:
16427 case TLS_MODEL_LOCAL_DYNAMIC:
16428 putc ('\n', file);
16429 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16430 fputs ("\t.tc .", file);
16431 RS6000_OUTPUT_BASENAME (file, name);
16432 fputs ("[TC],", file);
16433 output_addr_const (file, x);
16434 fputs ("@m", file);
16435 break;
16436 default:
16437 gcc_unreachable ();
16438 }
16439 }
16440 #endif
16441
16442 putc ('\n', file);
16443 }
16444 \f
16445 /* Output an assembler pseudo-op to write an ASCII string of N characters
16446 starting at P to FILE.
16447
16448 On the RS/6000, we have to do this using the .byte operation and
16449 write out special characters outside the quoted string.
16450 Also, the assembler is broken; very long strings are truncated,
16451 so we must artificially break them up early. */
16452
16453 void
16454 output_ascii (FILE *file, const char *p, int n)
16455 {
16456 char c;
16457 int i, count_string;
16458 const char *for_string = "\t.byte \"";
16459 const char *for_decimal = "\t.byte ";
16460 const char *to_close = NULL;
16461
16462 count_string = 0;
16463 for (i = 0; i < n; i++)
16464 {
16465 c = *p++;
16466 if (c >= ' ' && c < 0177)
16467 {
16468 if (for_string)
16469 fputs (for_string, file);
16470 putc (c, file);
16471
16472 /* Write two quotes to get one. */
16473 if (c == '"')
16474 {
16475 putc (c, file);
16476 ++count_string;
16477 }
16478
16479 for_string = NULL;
16480 for_decimal = "\"\n\t.byte ";
16481 to_close = "\"\n";
16482 ++count_string;
16483
16484 if (count_string >= 512)
16485 {
16486 fputs (to_close, file);
16487
16488 for_string = "\t.byte \"";
16489 for_decimal = "\t.byte ";
16490 to_close = NULL;
16491 count_string = 0;
16492 }
16493 }
16494 else
16495 {
16496 if (for_decimal)
16497 fputs (for_decimal, file);
16498 fprintf (file, "%d", c);
16499
16500 for_string = "\n\t.byte \"";
16501 for_decimal = ", ";
16502 to_close = "\n";
16503 count_string = 0;
16504 }
16505 }
16506
16507 /* Now close the string if we have written one. Then end the line. */
16508 if (to_close)
16509 fputs (to_close, file);
16510 }
16511 \f
16512 /* Generate a unique section name for FILENAME for a section type
16513 represented by SECTION_DESC. Output goes into BUF.
16514
16515 SECTION_DESC can be any string, as long as it is different for each
16516 possible section type.
16517
16518 We name the section in the same manner as xlc. The name begins with an
16519 underscore followed by the filename (after stripping any leading directory
16520 names) with the last period replaced by the string SECTION_DESC. If
16521 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16522 the name. */
16523
16524 void
16525 rs6000_gen_section_name (char **buf, const char *filename,
16526 const char *section_desc)
16527 {
16528 const char *q, *after_last_slash, *last_period = 0;
16529 char *p;
16530 int len;
16531
16532 after_last_slash = filename;
16533 for (q = filename; *q; q++)
16534 {
16535 if (*q == '/')
16536 after_last_slash = q + 1;
16537 else if (*q == '.')
16538 last_period = q;
16539 }
16540
16541 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16542 *buf = (char *) xmalloc (len);
16543
16544 p = *buf;
16545 *p++ = '_';
16546
16547 for (q = after_last_slash; *q; q++)
16548 {
16549 if (q == last_period)
16550 {
16551 strcpy (p, section_desc);
16552 p += strlen (section_desc);
16553 break;
16554 }
16555
16556 else if (ISALNUM (*q))
16557 *p++ = *q;
16558 }
16559
16560 if (last_period == 0)
16561 strcpy (p, section_desc);
16562 else
16563 *p = '\0';
16564 }
16565 \f
16566 /* Emit profile function. */
16567
16568 void
16569 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16570 {
16571 /* Non-standard profiling for kernels, which just saves LR then calls
16572 _mcount without worrying about arg saves. The idea is to change
16573 the function prologue as little as possible as it isn't easy to
16574 account for arg save/restore code added just for _mcount. */
16575 if (TARGET_PROFILE_KERNEL)
16576 return;
16577
16578 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16579 {
16580 #ifndef NO_PROFILE_COUNTERS
16581 # define NO_PROFILE_COUNTERS 0
16582 #endif
16583 if (NO_PROFILE_COUNTERS)
16584 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16585 LCT_NORMAL, VOIDmode);
16586 else
16587 {
16588 char buf[30];
16589 const char *label_name;
16590 rtx fun;
16591
16592 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16593 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16594 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16595
16596 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16597 LCT_NORMAL, VOIDmode, fun, Pmode);
16598 }
16599 }
16600 else if (DEFAULT_ABI == ABI_DARWIN)
16601 {
16602 const char *mcount_name = RS6000_MCOUNT;
16603 int caller_addr_regno = LR_REGNO;
16604
16605 /* Be conservative and always set this, at least for now. */
16606 crtl->uses_pic_offset_table = 1;
16607
16608 #if TARGET_MACHO
16609 /* For PIC code, set up a stub and collect the caller's address
16610 from r0, which is where the prologue puts it. */
16611 if (MACHOPIC_INDIRECT
16612 && crtl->uses_pic_offset_table)
16613 caller_addr_regno = 0;
16614 #endif
16615 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16616 LCT_NORMAL, VOIDmode,
16617 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16618 }
16619 }
16620
16621 /* Write function profiler code. */
16622
16623 void
16624 output_function_profiler (FILE *file, int labelno)
16625 {
16626 char buf[100];
16627
16628 switch (DEFAULT_ABI)
16629 {
16630 default:
16631 gcc_unreachable ();
16632
16633 case ABI_V4:
16634 if (!TARGET_32BIT)
16635 {
16636 warning (0, "no profiling of 64-bit code for this ABI");
16637 return;
16638 }
16639 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16640 fprintf (file, "\tmflr %s\n", reg_names[0]);
16641 if (NO_PROFILE_COUNTERS)
16642 {
16643 asm_fprintf (file, "\tstw %s,4(%s)\n",
16644 reg_names[0], reg_names[1]);
16645 }
16646 else if (TARGET_SECURE_PLT && flag_pic)
16647 {
16648 if (TARGET_LINK_STACK)
16649 {
16650 char name[32];
16651 get_ppc476_thunk_name (name);
16652 asm_fprintf (file, "\tbl %s\n", name);
16653 }
16654 else
16655 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16656 asm_fprintf (file, "\tstw %s,4(%s)\n",
16657 reg_names[0], reg_names[1]);
16658 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16659 asm_fprintf (file, "\taddis %s,%s,",
16660 reg_names[12], reg_names[12]);
16661 assemble_name (file, buf);
16662 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16663 assemble_name (file, buf);
16664 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16665 }
16666 else if (flag_pic == 1)
16667 {
16668 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16669 asm_fprintf (file, "\tstw %s,4(%s)\n",
16670 reg_names[0], reg_names[1]);
16671 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16672 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16673 assemble_name (file, buf);
16674 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16675 }
16676 else if (flag_pic > 1)
16677 {
16678 asm_fprintf (file, "\tstw %s,4(%s)\n",
16679 reg_names[0], reg_names[1]);
16680 /* Now, we need to get the address of the label. */
16681 if (TARGET_LINK_STACK)
16682 {
16683 char name[32];
16684 get_ppc476_thunk_name (name);
16685 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16686 assemble_name (file, buf);
16687 fputs ("-.\n1:", file);
16688 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16689 asm_fprintf (file, "\taddi %s,%s,4\n",
16690 reg_names[11], reg_names[11]);
16691 }
16692 else
16693 {
16694 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16695 assemble_name (file, buf);
16696 fputs ("-.\n1:", file);
16697 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16698 }
16699 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16700 reg_names[0], reg_names[11]);
16701 asm_fprintf (file, "\tadd %s,%s,%s\n",
16702 reg_names[0], reg_names[0], reg_names[11]);
16703 }
16704 else
16705 {
16706 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16707 assemble_name (file, buf);
16708 fputs ("@ha\n", file);
16709 asm_fprintf (file, "\tstw %s,4(%s)\n",
16710 reg_names[0], reg_names[1]);
16711 asm_fprintf (file, "\tla %s,", reg_names[0]);
16712 assemble_name (file, buf);
16713 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16714 }
16715
16716 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16717 fprintf (file, "\tbl %s%s\n",
16718 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16719 break;
16720
16721 case ABI_AIX:
16722 case ABI_ELFv2:
16723 case ABI_DARWIN:
16724 /* Don't do anything, done in output_profile_hook (). */
16725 break;
16726 }
16727 }
16728
16729 \f
16730
16731 /* The following variable value is the last issued insn. */
16732
16733 static rtx_insn *last_scheduled_insn;
16734
16735 /* The following variable helps to balance issuing of load and
16736 store instructions */
16737
16738 static int load_store_pendulum;
16739
16740 /* The following variable helps pair divide insns during scheduling. */
16741 static int divide_cnt;
16742 /* The following variable helps pair and alternate vector and vector load
16743 insns during scheduling. */
16744 static int vec_pairing;
16745
16746
16747 /* Power4 load update and store update instructions are cracked into a
16748 load or store and an integer insn which are executed in the same cycle.
16749 Branches have their own dispatch slot which does not count against the
16750 GCC issue rate, but it changes the program flow so there are no other
16751 instructions to issue in this cycle. */
16752
16753 static int
16754 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16755 {
16756 last_scheduled_insn = insn;
16757 if (GET_CODE (PATTERN (insn)) == USE
16758 || GET_CODE (PATTERN (insn)) == CLOBBER)
16759 {
16760 cached_can_issue_more = more;
16761 return cached_can_issue_more;
16762 }
16763
16764 if (insn_terminates_group_p (insn, current_group))
16765 {
16766 cached_can_issue_more = 0;
16767 return cached_can_issue_more;
16768 }
16769
16770 /* If no reservation, but reach here */
16771 if (recog_memoized (insn) < 0)
16772 return more;
16773
16774 if (rs6000_sched_groups)
16775 {
16776 if (is_microcoded_insn (insn))
16777 cached_can_issue_more = 0;
16778 else if (is_cracked_insn (insn))
16779 cached_can_issue_more = more > 2 ? more - 2 : 0;
16780 else
16781 cached_can_issue_more = more - 1;
16782
16783 return cached_can_issue_more;
16784 }
16785
16786 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16787 return 0;
16788
16789 cached_can_issue_more = more - 1;
16790 return cached_can_issue_more;
16791 }
16792
16793 static int
16794 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16795 {
16796 int r = rs6000_variable_issue_1 (insn, more);
16797 if (verbose)
16798 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16799 return r;
16800 }
16801
16802 /* Adjust the cost of a scheduling dependency. Return the new cost of
16803 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16804
16805 static int
16806 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16807 unsigned int)
16808 {
16809 enum attr_type attr_type;
16810
16811 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16812 return cost;
16813
16814 switch (dep_type)
16815 {
16816 case REG_DEP_TRUE:
16817 {
16818 /* Data dependency; DEP_INSN writes a register that INSN reads
16819 some cycles later. */
16820
16821 /* Separate a load from a narrower, dependent store. */
16822 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16823 || rs6000_tune == PROCESSOR_FUTURE)
16824 && GET_CODE (PATTERN (insn)) == SET
16825 && GET_CODE (PATTERN (dep_insn)) == SET
16826 && MEM_P (XEXP (PATTERN (insn), 1))
16827 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16828 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16829 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16830 return cost + 14;
16831
16832 attr_type = get_attr_type (insn);
16833
16834 switch (attr_type)
16835 {
16836 case TYPE_JMPREG:
16837 /* Tell the first scheduling pass about the latency between
16838 a mtctr and bctr (and mtlr and br/blr). The first
16839 scheduling pass will not know about this latency since
16840 the mtctr instruction, which has the latency associated
16841 to it, will be generated by reload. */
16842 return 4;
16843 case TYPE_BRANCH:
16844 /* Leave some extra cycles between a compare and its
16845 dependent branch, to inhibit expensive mispredicts. */
16846 if ((rs6000_tune == PROCESSOR_PPC603
16847 || rs6000_tune == PROCESSOR_PPC604
16848 || rs6000_tune == PROCESSOR_PPC604e
16849 || rs6000_tune == PROCESSOR_PPC620
16850 || rs6000_tune == PROCESSOR_PPC630
16851 || rs6000_tune == PROCESSOR_PPC750
16852 || rs6000_tune == PROCESSOR_PPC7400
16853 || rs6000_tune == PROCESSOR_PPC7450
16854 || rs6000_tune == PROCESSOR_PPCE5500
16855 || rs6000_tune == PROCESSOR_PPCE6500
16856 || rs6000_tune == PROCESSOR_POWER4
16857 || rs6000_tune == PROCESSOR_POWER5
16858 || rs6000_tune == PROCESSOR_POWER7
16859 || rs6000_tune == PROCESSOR_POWER8
16860 || rs6000_tune == PROCESSOR_POWER9
16861 || rs6000_tune == PROCESSOR_FUTURE
16862 || rs6000_tune == PROCESSOR_CELL)
16863 && recog_memoized (dep_insn)
16864 && (INSN_CODE (dep_insn) >= 0))
16865
16866 switch (get_attr_type (dep_insn))
16867 {
16868 case TYPE_CMP:
16869 case TYPE_FPCOMPARE:
16870 case TYPE_CR_LOGICAL:
16871 return cost + 2;
16872 case TYPE_EXTS:
16873 case TYPE_MUL:
16874 if (get_attr_dot (dep_insn) == DOT_YES)
16875 return cost + 2;
16876 else
16877 break;
16878 case TYPE_SHIFT:
16879 if (get_attr_dot (dep_insn) == DOT_YES
16880 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16881 return cost + 2;
16882 else
16883 break;
16884 default:
16885 break;
16886 }
16887 break;
16888
16889 case TYPE_STORE:
16890 case TYPE_FPSTORE:
16891 if ((rs6000_tune == PROCESSOR_POWER6)
16892 && recog_memoized (dep_insn)
16893 && (INSN_CODE (dep_insn) >= 0))
16894 {
16895
16896 if (GET_CODE (PATTERN (insn)) != SET)
16897 /* If this happens, we have to extend this to schedule
16898 optimally. Return default for now. */
16899 return cost;
16900
16901 /* Adjust the cost for the case where the value written
16902 by a fixed point operation is used as the address
16903 gen value on a store. */
16904 switch (get_attr_type (dep_insn))
16905 {
16906 case TYPE_LOAD:
16907 case TYPE_CNTLZ:
16908 {
16909 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16910 return get_attr_sign_extend (dep_insn)
16911 == SIGN_EXTEND_YES ? 6 : 4;
16912 break;
16913 }
16914 case TYPE_SHIFT:
16915 {
16916 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16917 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16918 6 : 3;
16919 break;
16920 }
16921 case TYPE_INTEGER:
16922 case TYPE_ADD:
16923 case TYPE_LOGICAL:
16924 case TYPE_EXTS:
16925 case TYPE_INSERT:
16926 {
16927 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16928 return 3;
16929 break;
16930 }
16931 case TYPE_STORE:
16932 case TYPE_FPLOAD:
16933 case TYPE_FPSTORE:
16934 {
16935 if (get_attr_update (dep_insn) == UPDATE_YES
16936 && ! rs6000_store_data_bypass_p (dep_insn, insn))
16937 return 3;
16938 break;
16939 }
16940 case TYPE_MUL:
16941 {
16942 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16943 return 17;
16944 break;
16945 }
16946 case TYPE_DIV:
16947 {
16948 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16949 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16950 break;
16951 }
16952 default:
16953 break;
16954 }
16955 }
16956 break;
16957
16958 case TYPE_LOAD:
16959 if ((rs6000_tune == PROCESSOR_POWER6)
16960 && recog_memoized (dep_insn)
16961 && (INSN_CODE (dep_insn) >= 0))
16962 {
16963
16964 /* Adjust the cost for the case where the value written
16965 by a fixed point instruction is used within the address
16966 gen portion of a subsequent load(u)(x) */
16967 switch (get_attr_type (dep_insn))
16968 {
16969 case TYPE_LOAD:
16970 case TYPE_CNTLZ:
16971 {
16972 if (set_to_load_agen (dep_insn, insn))
16973 return get_attr_sign_extend (dep_insn)
16974 == SIGN_EXTEND_YES ? 6 : 4;
16975 break;
16976 }
16977 case TYPE_SHIFT:
16978 {
16979 if (set_to_load_agen (dep_insn, insn))
16980 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16981 6 : 3;
16982 break;
16983 }
16984 case TYPE_INTEGER:
16985 case TYPE_ADD:
16986 case TYPE_LOGICAL:
16987 case TYPE_EXTS:
16988 case TYPE_INSERT:
16989 {
16990 if (set_to_load_agen (dep_insn, insn))
16991 return 3;
16992 break;
16993 }
16994 case TYPE_STORE:
16995 case TYPE_FPLOAD:
16996 case TYPE_FPSTORE:
16997 {
16998 if (get_attr_update (dep_insn) == UPDATE_YES
16999 && set_to_load_agen (dep_insn, insn))
17000 return 3;
17001 break;
17002 }
17003 case TYPE_MUL:
17004 {
17005 if (set_to_load_agen (dep_insn, insn))
17006 return 17;
17007 break;
17008 }
17009 case TYPE_DIV:
17010 {
17011 if (set_to_load_agen (dep_insn, insn))
17012 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17013 break;
17014 }
17015 default:
17016 break;
17017 }
17018 }
17019 break;
17020
17021 case TYPE_FPLOAD:
17022 if ((rs6000_tune == PROCESSOR_POWER6)
17023 && get_attr_update (insn) == UPDATE_NO
17024 && recog_memoized (dep_insn)
17025 && (INSN_CODE (dep_insn) >= 0)
17026 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17027 return 2;
17028
17029 default:
17030 break;
17031 }
17032
17033 /* Fall out to return default cost. */
17034 }
17035 break;
17036
17037 case REG_DEP_OUTPUT:
17038 /* Output dependency; DEP_INSN writes a register that INSN writes some
17039 cycles later. */
17040 if ((rs6000_tune == PROCESSOR_POWER6)
17041 && recog_memoized (dep_insn)
17042 && (INSN_CODE (dep_insn) >= 0))
17043 {
17044 attr_type = get_attr_type (insn);
17045
17046 switch (attr_type)
17047 {
17048 case TYPE_FP:
17049 case TYPE_FPSIMPLE:
17050 if (get_attr_type (dep_insn) == TYPE_FP
17051 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17052 return 1;
17053 break;
17054 case TYPE_FPLOAD:
17055 if (get_attr_update (insn) == UPDATE_NO
17056 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17057 return 2;
17058 break;
17059 default:
17060 break;
17061 }
17062 }
17063 /* Fall through, no cost for output dependency. */
17064 /* FALLTHRU */
17065
17066 case REG_DEP_ANTI:
17067 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17068 cycles later. */
17069 return 0;
17070
17071 default:
17072 gcc_unreachable ();
17073 }
17074
17075 return cost;
17076 }
17077
17078 /* Debug version of rs6000_adjust_cost. */
17079
17080 static int
17081 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17082 int cost, unsigned int dw)
17083 {
17084 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17085
17086 if (ret != cost)
17087 {
17088 const char *dep;
17089
17090 switch (dep_type)
17091 {
17092 default: dep = "unknown depencency"; break;
17093 case REG_DEP_TRUE: dep = "data dependency"; break;
17094 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17095 case REG_DEP_ANTI: dep = "anti depencency"; break;
17096 }
17097
17098 fprintf (stderr,
17099 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17100 "%s, insn:\n", ret, cost, dep);
17101
17102 debug_rtx (insn);
17103 }
17104
17105 return ret;
17106 }
17107
17108 /* The function returns a true if INSN is microcoded.
17109 Return false otherwise. */
17110
17111 static bool
17112 is_microcoded_insn (rtx_insn *insn)
17113 {
17114 if (!insn || !NONDEBUG_INSN_P (insn)
17115 || GET_CODE (PATTERN (insn)) == USE
17116 || GET_CODE (PATTERN (insn)) == CLOBBER)
17117 return false;
17118
17119 if (rs6000_tune == PROCESSOR_CELL)
17120 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17121
17122 if (rs6000_sched_groups
17123 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17124 {
17125 enum attr_type type = get_attr_type (insn);
17126 if ((type == TYPE_LOAD
17127 && get_attr_update (insn) == UPDATE_YES
17128 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17129 || ((type == TYPE_LOAD || type == TYPE_STORE)
17130 && get_attr_update (insn) == UPDATE_YES
17131 && get_attr_indexed (insn) == INDEXED_YES)
17132 || type == TYPE_MFCR)
17133 return true;
17134 }
17135
17136 return false;
17137 }
17138
17139 /* The function returns true if INSN is cracked into 2 instructions
17140 by the processor (and therefore occupies 2 issue slots). */
17141
17142 static bool
17143 is_cracked_insn (rtx_insn *insn)
17144 {
17145 if (!insn || !NONDEBUG_INSN_P (insn)
17146 || GET_CODE (PATTERN (insn)) == USE
17147 || GET_CODE (PATTERN (insn)) == CLOBBER)
17148 return false;
17149
17150 if (rs6000_sched_groups
17151 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17152 {
17153 enum attr_type type = get_attr_type (insn);
17154 if ((type == TYPE_LOAD
17155 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17156 && get_attr_update (insn) == UPDATE_NO)
17157 || (type == TYPE_LOAD
17158 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17159 && get_attr_update (insn) == UPDATE_YES
17160 && get_attr_indexed (insn) == INDEXED_NO)
17161 || (type == TYPE_STORE
17162 && get_attr_update (insn) == UPDATE_YES
17163 && get_attr_indexed (insn) == INDEXED_NO)
17164 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17165 && get_attr_update (insn) == UPDATE_YES)
17166 || (type == TYPE_CR_LOGICAL
17167 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17168 || (type == TYPE_EXTS
17169 && get_attr_dot (insn) == DOT_YES)
17170 || (type == TYPE_SHIFT
17171 && get_attr_dot (insn) == DOT_YES
17172 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17173 || (type == TYPE_MUL
17174 && get_attr_dot (insn) == DOT_YES)
17175 || type == TYPE_DIV
17176 || (type == TYPE_INSERT
17177 && get_attr_size (insn) == SIZE_32))
17178 return true;
17179 }
17180
17181 return false;
17182 }
17183
17184 /* The function returns true if INSN can be issued only from
17185 the branch slot. */
17186
17187 static bool
17188 is_branch_slot_insn (rtx_insn *insn)
17189 {
17190 if (!insn || !NONDEBUG_INSN_P (insn)
17191 || GET_CODE (PATTERN (insn)) == USE
17192 || GET_CODE (PATTERN (insn)) == CLOBBER)
17193 return false;
17194
17195 if (rs6000_sched_groups)
17196 {
17197 enum attr_type type = get_attr_type (insn);
17198 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17199 return true;
17200 return false;
17201 }
17202
17203 return false;
17204 }
17205
17206 /* The function returns true if out_inst sets a value that is
17207 used in the address generation computation of in_insn */
17208 static bool
17209 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17210 {
17211 rtx out_set, in_set;
17212
17213 /* For performance reasons, only handle the simple case where
17214 both loads are a single_set. */
17215 out_set = single_set (out_insn);
17216 if (out_set)
17217 {
17218 in_set = single_set (in_insn);
17219 if (in_set)
17220 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17221 }
17222
17223 return false;
17224 }
17225
17226 /* Try to determine base/offset/size parts of the given MEM.
17227 Return true if successful, false if all the values couldn't
17228 be determined.
17229
17230 This function only looks for REG or REG+CONST address forms.
17231 REG+REG address form will return false. */
17232
17233 static bool
17234 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17235 HOST_WIDE_INT *size)
17236 {
17237 rtx addr_rtx;
17238 if MEM_SIZE_KNOWN_P (mem)
17239 *size = MEM_SIZE (mem);
17240 else
17241 return false;
17242
17243 addr_rtx = (XEXP (mem, 0));
17244 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17245 addr_rtx = XEXP (addr_rtx, 1);
17246
17247 *offset = 0;
17248 while (GET_CODE (addr_rtx) == PLUS
17249 && CONST_INT_P (XEXP (addr_rtx, 1)))
17250 {
17251 *offset += INTVAL (XEXP (addr_rtx, 1));
17252 addr_rtx = XEXP (addr_rtx, 0);
17253 }
17254 if (!REG_P (addr_rtx))
17255 return false;
17256
17257 *base = addr_rtx;
17258 return true;
17259 }
17260
17261 /* The function returns true if the target storage location of
17262 mem1 is adjacent to the target storage location of mem2 */
17263 /* Return 1 if memory locations are adjacent. */
17264
17265 static bool
17266 adjacent_mem_locations (rtx mem1, rtx mem2)
17267 {
17268 rtx reg1, reg2;
17269 HOST_WIDE_INT off1, size1, off2, size2;
17270
17271 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17272 && get_memref_parts (mem2, &reg2, &off2, &size2))
17273 return ((REGNO (reg1) == REGNO (reg2))
17274 && ((off1 + size1 == off2)
17275 || (off2 + size2 == off1)));
17276
17277 return false;
17278 }
17279
17280 /* This function returns true if it can be determined that the two MEM
17281 locations overlap by at least 1 byte based on base reg/offset/size. */
17282
17283 static bool
17284 mem_locations_overlap (rtx mem1, rtx mem2)
17285 {
17286 rtx reg1, reg2;
17287 HOST_WIDE_INT off1, size1, off2, size2;
17288
17289 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17290 && get_memref_parts (mem2, &reg2, &off2, &size2))
17291 return ((REGNO (reg1) == REGNO (reg2))
17292 && (((off1 <= off2) && (off1 + size1 > off2))
17293 || ((off2 <= off1) && (off2 + size2 > off1))));
17294
17295 return false;
17296 }
17297
17298 /* A C statement (sans semicolon) to update the integer scheduling
17299 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17300 INSN earlier, reduce the priority to execute INSN later. Do not
17301 define this macro if you do not need to adjust the scheduling
17302 priorities of insns. */
17303
17304 static int
17305 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17306 {
17307 rtx load_mem, str_mem;
17308 /* On machines (like the 750) which have asymmetric integer units,
17309 where one integer unit can do multiply and divides and the other
17310 can't, reduce the priority of multiply/divide so it is scheduled
17311 before other integer operations. */
17312
17313 #if 0
17314 if (! INSN_P (insn))
17315 return priority;
17316
17317 if (GET_CODE (PATTERN (insn)) == USE)
17318 return priority;
17319
17320 switch (rs6000_tune) {
17321 case PROCESSOR_PPC750:
17322 switch (get_attr_type (insn))
17323 {
17324 default:
17325 break;
17326
17327 case TYPE_MUL:
17328 case TYPE_DIV:
17329 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17330 priority, priority);
17331 if (priority >= 0 && priority < 0x01000000)
17332 priority >>= 3;
17333 break;
17334 }
17335 }
17336 #endif
17337
17338 if (insn_must_be_first_in_group (insn)
17339 && reload_completed
17340 && current_sched_info->sched_max_insns_priority
17341 && rs6000_sched_restricted_insns_priority)
17342 {
17343
17344 /* Prioritize insns that can be dispatched only in the first
17345 dispatch slot. */
17346 if (rs6000_sched_restricted_insns_priority == 1)
17347 /* Attach highest priority to insn. This means that in
17348 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17349 precede 'priority' (critical path) considerations. */
17350 return current_sched_info->sched_max_insns_priority;
17351 else if (rs6000_sched_restricted_insns_priority == 2)
17352 /* Increase priority of insn by a minimal amount. This means that in
17353 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17354 considerations precede dispatch-slot restriction considerations. */
17355 return (priority + 1);
17356 }
17357
17358 if (rs6000_tune == PROCESSOR_POWER6
17359 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17360 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17361 /* Attach highest priority to insn if the scheduler has just issued two
17362 stores and this instruction is a load, or two loads and this instruction
17363 is a store. Power6 wants loads and stores scheduled alternately
17364 when possible */
17365 return current_sched_info->sched_max_insns_priority;
17366
17367 return priority;
17368 }
17369
17370 /* Return true if the instruction is nonpipelined on the Cell. */
17371 static bool
17372 is_nonpipeline_insn (rtx_insn *insn)
17373 {
17374 enum attr_type type;
17375 if (!insn || !NONDEBUG_INSN_P (insn)
17376 || GET_CODE (PATTERN (insn)) == USE
17377 || GET_CODE (PATTERN (insn)) == CLOBBER)
17378 return false;
17379
17380 type = get_attr_type (insn);
17381 if (type == TYPE_MUL
17382 || type == TYPE_DIV
17383 || type == TYPE_SDIV
17384 || type == TYPE_DDIV
17385 || type == TYPE_SSQRT
17386 || type == TYPE_DSQRT
17387 || type == TYPE_MFCR
17388 || type == TYPE_MFCRF
17389 || type == TYPE_MFJMPR)
17390 {
17391 return true;
17392 }
17393 return false;
17394 }
17395
17396
17397 /* Return how many instructions the machine can issue per cycle. */
17398
17399 static int
17400 rs6000_issue_rate (void)
17401 {
17402 /* Unless scheduling for register pressure, use issue rate of 1 for
17403 first scheduling pass to decrease degradation. */
17404 if (!reload_completed && !flag_sched_pressure)
17405 return 1;
17406
17407 switch (rs6000_tune) {
17408 case PROCESSOR_RS64A:
17409 case PROCESSOR_PPC601: /* ? */
17410 case PROCESSOR_PPC7450:
17411 return 3;
17412 case PROCESSOR_PPC440:
17413 case PROCESSOR_PPC603:
17414 case PROCESSOR_PPC750:
17415 case PROCESSOR_PPC7400:
17416 case PROCESSOR_PPC8540:
17417 case PROCESSOR_PPC8548:
17418 case PROCESSOR_CELL:
17419 case PROCESSOR_PPCE300C2:
17420 case PROCESSOR_PPCE300C3:
17421 case PROCESSOR_PPCE500MC:
17422 case PROCESSOR_PPCE500MC64:
17423 case PROCESSOR_PPCE5500:
17424 case PROCESSOR_PPCE6500:
17425 case PROCESSOR_TITAN:
17426 return 2;
17427 case PROCESSOR_PPC476:
17428 case PROCESSOR_PPC604:
17429 case PROCESSOR_PPC604e:
17430 case PROCESSOR_PPC620:
17431 case PROCESSOR_PPC630:
17432 return 4;
17433 case PROCESSOR_POWER4:
17434 case PROCESSOR_POWER5:
17435 case PROCESSOR_POWER6:
17436 case PROCESSOR_POWER7:
17437 return 5;
17438 case PROCESSOR_POWER8:
17439 return 7;
17440 case PROCESSOR_POWER9:
17441 case PROCESSOR_FUTURE:
17442 return 6;
17443 default:
17444 return 1;
17445 }
17446 }
17447
17448 /* Return how many instructions to look ahead for better insn
17449 scheduling. */
17450
17451 static int
17452 rs6000_use_sched_lookahead (void)
17453 {
17454 switch (rs6000_tune)
17455 {
17456 case PROCESSOR_PPC8540:
17457 case PROCESSOR_PPC8548:
17458 return 4;
17459
17460 case PROCESSOR_CELL:
17461 return (reload_completed ? 8 : 0);
17462
17463 default:
17464 return 0;
17465 }
17466 }
17467
17468 /* We are choosing insn from the ready queue. Return zero if INSN can be
17469 chosen. */
17470 static int
17471 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17472 {
17473 if (ready_index == 0)
17474 return 0;
17475
17476 if (rs6000_tune != PROCESSOR_CELL)
17477 return 0;
17478
17479 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17480
17481 if (!reload_completed
17482 || is_nonpipeline_insn (insn)
17483 || is_microcoded_insn (insn))
17484 return 1;
17485
17486 return 0;
17487 }
17488
17489 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17490 and return true. */
17491
17492 static bool
17493 find_mem_ref (rtx pat, rtx *mem_ref)
17494 {
17495 const char * fmt;
17496 int i, j;
17497
17498 /* stack_tie does not produce any real memory traffic. */
17499 if (tie_operand (pat, VOIDmode))
17500 return false;
17501
17502 if (MEM_P (pat))
17503 {
17504 *mem_ref = pat;
17505 return true;
17506 }
17507
17508 /* Recursively process the pattern. */
17509 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17510
17511 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17512 {
17513 if (fmt[i] == 'e')
17514 {
17515 if (find_mem_ref (XEXP (pat, i), mem_ref))
17516 return true;
17517 }
17518 else if (fmt[i] == 'E')
17519 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17520 {
17521 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17522 return true;
17523 }
17524 }
17525
17526 return false;
17527 }
17528
17529 /* Determine if PAT is a PATTERN of a load insn. */
17530
17531 static bool
17532 is_load_insn1 (rtx pat, rtx *load_mem)
17533 {
17534 if (!pat || pat == NULL_RTX)
17535 return false;
17536
17537 if (GET_CODE (pat) == SET)
17538 return find_mem_ref (SET_SRC (pat), load_mem);
17539
17540 if (GET_CODE (pat) == PARALLEL)
17541 {
17542 int i;
17543
17544 for (i = 0; i < XVECLEN (pat, 0); i++)
17545 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17546 return true;
17547 }
17548
17549 return false;
17550 }
17551
17552 /* Determine if INSN loads from memory. */
17553
17554 static bool
17555 is_load_insn (rtx insn, rtx *load_mem)
17556 {
17557 if (!insn || !INSN_P (insn))
17558 return false;
17559
17560 if (CALL_P (insn))
17561 return false;
17562
17563 return is_load_insn1 (PATTERN (insn), load_mem);
17564 }
17565
17566 /* Determine if PAT is a PATTERN of a store insn. */
17567
17568 static bool
17569 is_store_insn1 (rtx pat, rtx *str_mem)
17570 {
17571 if (!pat || pat == NULL_RTX)
17572 return false;
17573
17574 if (GET_CODE (pat) == SET)
17575 return find_mem_ref (SET_DEST (pat), str_mem);
17576
17577 if (GET_CODE (pat) == PARALLEL)
17578 {
17579 int i;
17580
17581 for (i = 0; i < XVECLEN (pat, 0); i++)
17582 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17583 return true;
17584 }
17585
17586 return false;
17587 }
17588
17589 /* Determine if INSN stores to memory. */
17590
17591 static bool
17592 is_store_insn (rtx insn, rtx *str_mem)
17593 {
17594 if (!insn || !INSN_P (insn))
17595 return false;
17596
17597 return is_store_insn1 (PATTERN (insn), str_mem);
17598 }
17599
17600 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17601
17602 static bool
17603 is_power9_pairable_vec_type (enum attr_type type)
17604 {
17605 switch (type)
17606 {
17607 case TYPE_VECSIMPLE:
17608 case TYPE_VECCOMPLEX:
17609 case TYPE_VECDIV:
17610 case TYPE_VECCMP:
17611 case TYPE_VECPERM:
17612 case TYPE_VECFLOAT:
17613 case TYPE_VECFDIV:
17614 case TYPE_VECDOUBLE:
17615 return true;
17616 default:
17617 break;
17618 }
17619 return false;
17620 }
17621
17622 /* Returns whether the dependence between INSN and NEXT is considered
17623 costly by the given target. */
17624
17625 static bool
17626 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17627 {
17628 rtx insn;
17629 rtx next;
17630 rtx load_mem, str_mem;
17631
17632 /* If the flag is not enabled - no dependence is considered costly;
17633 allow all dependent insns in the same group.
17634 This is the most aggressive option. */
17635 if (rs6000_sched_costly_dep == no_dep_costly)
17636 return false;
17637
17638 /* If the flag is set to 1 - a dependence is always considered costly;
17639 do not allow dependent instructions in the same group.
17640 This is the most conservative option. */
17641 if (rs6000_sched_costly_dep == all_deps_costly)
17642 return true;
17643
17644 insn = DEP_PRO (dep);
17645 next = DEP_CON (dep);
17646
17647 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17648 && is_load_insn (next, &load_mem)
17649 && is_store_insn (insn, &str_mem))
17650 /* Prevent load after store in the same group. */
17651 return true;
17652
17653 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17654 && is_load_insn (next, &load_mem)
17655 && is_store_insn (insn, &str_mem)
17656 && DEP_TYPE (dep) == REG_DEP_TRUE
17657 && mem_locations_overlap(str_mem, load_mem))
17658 /* Prevent load after store in the same group if it is a true
17659 dependence. */
17660 return true;
17661
17662 /* The flag is set to X; dependences with latency >= X are considered costly,
17663 and will not be scheduled in the same group. */
17664 if (rs6000_sched_costly_dep <= max_dep_latency
17665 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17666 return true;
17667
17668 return false;
17669 }
17670
17671 /* Return the next insn after INSN that is found before TAIL is reached,
17672 skipping any "non-active" insns - insns that will not actually occupy
17673 an issue slot. Return NULL_RTX if such an insn is not found. */
17674
17675 static rtx_insn *
17676 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17677 {
17678 if (insn == NULL_RTX || insn == tail)
17679 return NULL;
17680
17681 while (1)
17682 {
17683 insn = NEXT_INSN (insn);
17684 if (insn == NULL_RTX || insn == tail)
17685 return NULL;
17686
17687 if (CALL_P (insn)
17688 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17689 || (NONJUMP_INSN_P (insn)
17690 && GET_CODE (PATTERN (insn)) != USE
17691 && GET_CODE (PATTERN (insn)) != CLOBBER
17692 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17693 break;
17694 }
17695 return insn;
17696 }
17697
17698 /* Move instruction at POS to the end of the READY list. */
17699
17700 static void
17701 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17702 {
17703 rtx_insn *tmp;
17704 int i;
17705
17706 tmp = ready[pos];
17707 for (i = pos; i < lastpos; i++)
17708 ready[i] = ready[i + 1];
17709 ready[lastpos] = tmp;
17710 }
17711
17712 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17713
17714 static int
17715 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17716 {
17717 /* For Power6, we need to handle some special cases to try and keep the
17718 store queue from overflowing and triggering expensive flushes.
17719
17720 This code monitors how load and store instructions are being issued
17721 and skews the ready list one way or the other to increase the likelihood
17722 that a desired instruction is issued at the proper time.
17723
17724 A couple of things are done. First, we maintain a "load_store_pendulum"
17725 to track the current state of load/store issue.
17726
17727 - If the pendulum is at zero, then no loads or stores have been
17728 issued in the current cycle so we do nothing.
17729
17730 - If the pendulum is 1, then a single load has been issued in this
17731 cycle and we attempt to locate another load in the ready list to
17732 issue with it.
17733
17734 - If the pendulum is -2, then two stores have already been
17735 issued in this cycle, so we increase the priority of the first load
17736 in the ready list to increase it's likelihood of being chosen first
17737 in the next cycle.
17738
17739 - If the pendulum is -1, then a single store has been issued in this
17740 cycle and we attempt to locate another store in the ready list to
17741 issue with it, preferring a store to an adjacent memory location to
17742 facilitate store pairing in the store queue.
17743
17744 - If the pendulum is 2, then two loads have already been
17745 issued in this cycle, so we increase the priority of the first store
17746 in the ready list to increase it's likelihood of being chosen first
17747 in the next cycle.
17748
17749 - If the pendulum < -2 or > 2, then do nothing.
17750
17751 Note: This code covers the most common scenarios. There exist non
17752 load/store instructions which make use of the LSU and which
17753 would need to be accounted for to strictly model the behavior
17754 of the machine. Those instructions are currently unaccounted
17755 for to help minimize compile time overhead of this code.
17756 */
17757 int pos;
17758 rtx load_mem, str_mem;
17759
17760 if (is_store_insn (last_scheduled_insn, &str_mem))
17761 /* Issuing a store, swing the load_store_pendulum to the left */
17762 load_store_pendulum--;
17763 else if (is_load_insn (last_scheduled_insn, &load_mem))
17764 /* Issuing a load, swing the load_store_pendulum to the right */
17765 load_store_pendulum++;
17766 else
17767 return cached_can_issue_more;
17768
17769 /* If the pendulum is balanced, or there is only one instruction on
17770 the ready list, then all is well, so return. */
17771 if ((load_store_pendulum == 0) || (lastpos <= 0))
17772 return cached_can_issue_more;
17773
17774 if (load_store_pendulum == 1)
17775 {
17776 /* A load has been issued in this cycle. Scan the ready list
17777 for another load to issue with it */
17778 pos = lastpos;
17779
17780 while (pos >= 0)
17781 {
17782 if (is_load_insn (ready[pos], &load_mem))
17783 {
17784 /* Found a load. Move it to the head of the ready list,
17785 and adjust it's priority so that it is more likely to
17786 stay there */
17787 move_to_end_of_ready (ready, pos, lastpos);
17788
17789 if (!sel_sched_p ()
17790 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17791 INSN_PRIORITY (ready[lastpos])++;
17792 break;
17793 }
17794 pos--;
17795 }
17796 }
17797 else if (load_store_pendulum == -2)
17798 {
17799 /* Two stores have been issued in this cycle. Increase the
17800 priority of the first load in the ready list to favor it for
17801 issuing in the next cycle. */
17802 pos = lastpos;
17803
17804 while (pos >= 0)
17805 {
17806 if (is_load_insn (ready[pos], &load_mem)
17807 && !sel_sched_p ()
17808 && INSN_PRIORITY_KNOWN (ready[pos]))
17809 {
17810 INSN_PRIORITY (ready[pos])++;
17811
17812 /* Adjust the pendulum to account for the fact that a load
17813 was found and increased in priority. This is to prevent
17814 increasing the priority of multiple loads */
17815 load_store_pendulum--;
17816
17817 break;
17818 }
17819 pos--;
17820 }
17821 }
17822 else if (load_store_pendulum == -1)
17823 {
17824 /* A store has been issued in this cycle. Scan the ready list for
17825 another store to issue with it, preferring a store to an adjacent
17826 memory location */
17827 int first_store_pos = -1;
17828
17829 pos = lastpos;
17830
17831 while (pos >= 0)
17832 {
17833 if (is_store_insn (ready[pos], &str_mem))
17834 {
17835 rtx str_mem2;
17836 /* Maintain the index of the first store found on the
17837 list */
17838 if (first_store_pos == -1)
17839 first_store_pos = pos;
17840
17841 if (is_store_insn (last_scheduled_insn, &str_mem2)
17842 && adjacent_mem_locations (str_mem, str_mem2))
17843 {
17844 /* Found an adjacent store. Move it to the head of the
17845 ready list, and adjust it's priority so that it is
17846 more likely to stay there */
17847 move_to_end_of_ready (ready, pos, lastpos);
17848
17849 if (!sel_sched_p ()
17850 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17851 INSN_PRIORITY (ready[lastpos])++;
17852
17853 first_store_pos = -1;
17854
17855 break;
17856 };
17857 }
17858 pos--;
17859 }
17860
17861 if (first_store_pos >= 0)
17862 {
17863 /* An adjacent store wasn't found, but a non-adjacent store was,
17864 so move the non-adjacent store to the front of the ready
17865 list, and adjust its priority so that it is more likely to
17866 stay there. */
17867 move_to_end_of_ready (ready, first_store_pos, lastpos);
17868 if (!sel_sched_p ()
17869 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17870 INSN_PRIORITY (ready[lastpos])++;
17871 }
17872 }
17873 else if (load_store_pendulum == 2)
17874 {
17875 /* Two loads have been issued in this cycle. Increase the priority
17876 of the first store in the ready list to favor it for issuing in
17877 the next cycle. */
17878 pos = lastpos;
17879
17880 while (pos >= 0)
17881 {
17882 if (is_store_insn (ready[pos], &str_mem)
17883 && !sel_sched_p ()
17884 && INSN_PRIORITY_KNOWN (ready[pos]))
17885 {
17886 INSN_PRIORITY (ready[pos])++;
17887
17888 /* Adjust the pendulum to account for the fact that a store
17889 was found and increased in priority. This is to prevent
17890 increasing the priority of multiple stores */
17891 load_store_pendulum++;
17892
17893 break;
17894 }
17895 pos--;
17896 }
17897 }
17898
17899 return cached_can_issue_more;
17900 }
17901
17902 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17903
17904 static int
17905 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17906 {
17907 int pos;
17908 enum attr_type type, type2;
17909
17910 type = get_attr_type (last_scheduled_insn);
17911
17912 /* Try to issue fixed point divides back-to-back in pairs so they will be
17913 routed to separate execution units and execute in parallel. */
17914 if (type == TYPE_DIV && divide_cnt == 0)
17915 {
17916 /* First divide has been scheduled. */
17917 divide_cnt = 1;
17918
17919 /* Scan the ready list looking for another divide, if found move it
17920 to the end of the list so it is chosen next. */
17921 pos = lastpos;
17922 while (pos >= 0)
17923 {
17924 if (recog_memoized (ready[pos]) >= 0
17925 && get_attr_type (ready[pos]) == TYPE_DIV)
17926 {
17927 move_to_end_of_ready (ready, pos, lastpos);
17928 break;
17929 }
17930 pos--;
17931 }
17932 }
17933 else
17934 {
17935 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17936 divide_cnt = 0;
17937
17938 /* The best dispatch throughput for vector and vector load insns can be
17939 achieved by interleaving a vector and vector load such that they'll
17940 dispatch to the same superslice. If this pairing cannot be achieved
17941 then it is best to pair vector insns together and vector load insns
17942 together.
17943
17944 To aid in this pairing, vec_pairing maintains the current state with
17945 the following values:
17946
17947 0 : Initial state, no vecload/vector pairing has been started.
17948
17949 1 : A vecload or vector insn has been issued and a candidate for
17950 pairing has been found and moved to the end of the ready
17951 list. */
17952 if (type == TYPE_VECLOAD)
17953 {
17954 /* Issued a vecload. */
17955 if (vec_pairing == 0)
17956 {
17957 int vecload_pos = -1;
17958 /* We issued a single vecload, look for a vector insn to pair it
17959 with. If one isn't found, try to pair another vecload. */
17960 pos = lastpos;
17961 while (pos >= 0)
17962 {
17963 if (recog_memoized (ready[pos]) >= 0)
17964 {
17965 type2 = get_attr_type (ready[pos]);
17966 if (is_power9_pairable_vec_type (type2))
17967 {
17968 /* Found a vector insn to pair with, move it to the
17969 end of the ready list so it is scheduled next. */
17970 move_to_end_of_ready (ready, pos, lastpos);
17971 vec_pairing = 1;
17972 return cached_can_issue_more;
17973 }
17974 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
17975 /* Remember position of first vecload seen. */
17976 vecload_pos = pos;
17977 }
17978 pos--;
17979 }
17980 if (vecload_pos >= 0)
17981 {
17982 /* Didn't find a vector to pair with but did find a vecload,
17983 move it to the end of the ready list. */
17984 move_to_end_of_ready (ready, vecload_pos, lastpos);
17985 vec_pairing = 1;
17986 return cached_can_issue_more;
17987 }
17988 }
17989 }
17990 else if (is_power9_pairable_vec_type (type))
17991 {
17992 /* Issued a vector operation. */
17993 if (vec_pairing == 0)
17994 {
17995 int vec_pos = -1;
17996 /* We issued a single vector insn, look for a vecload to pair it
17997 with. If one isn't found, try to pair another vector. */
17998 pos = lastpos;
17999 while (pos >= 0)
18000 {
18001 if (recog_memoized (ready[pos]) >= 0)
18002 {
18003 type2 = get_attr_type (ready[pos]);
18004 if (type2 == TYPE_VECLOAD)
18005 {
18006 /* Found a vecload insn to pair with, move it to the
18007 end of the ready list so it is scheduled next. */
18008 move_to_end_of_ready (ready, pos, lastpos);
18009 vec_pairing = 1;
18010 return cached_can_issue_more;
18011 }
18012 else if (is_power9_pairable_vec_type (type2)
18013 && vec_pos == -1)
18014 /* Remember position of first vector insn seen. */
18015 vec_pos = pos;
18016 }
18017 pos--;
18018 }
18019 if (vec_pos >= 0)
18020 {
18021 /* Didn't find a vecload to pair with but did find a vector
18022 insn, move it to the end of the ready list. */
18023 move_to_end_of_ready (ready, vec_pos, lastpos);
18024 vec_pairing = 1;
18025 return cached_can_issue_more;
18026 }
18027 }
18028 }
18029
18030 /* We've either finished a vec/vecload pair, couldn't find an insn to
18031 continue the current pair, or the last insn had nothing to do with
18032 with pairing. In any case, reset the state. */
18033 vec_pairing = 0;
18034 }
18035
18036 return cached_can_issue_more;
18037 }
18038
18039 /* We are about to begin issuing insns for this clock cycle. */
18040
18041 static int
18042 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18043 rtx_insn **ready ATTRIBUTE_UNUSED,
18044 int *pn_ready ATTRIBUTE_UNUSED,
18045 int clock_var ATTRIBUTE_UNUSED)
18046 {
18047 int n_ready = *pn_ready;
18048
18049 if (sched_verbose)
18050 fprintf (dump, "// rs6000_sched_reorder :\n");
18051
18052 /* Reorder the ready list, if the second to last ready insn
18053 is a nonepipeline insn. */
18054 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18055 {
18056 if (is_nonpipeline_insn (ready[n_ready - 1])
18057 && (recog_memoized (ready[n_ready - 2]) > 0))
18058 /* Simply swap first two insns. */
18059 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18060 }
18061
18062 if (rs6000_tune == PROCESSOR_POWER6)
18063 load_store_pendulum = 0;
18064
18065 return rs6000_issue_rate ();
18066 }
18067
18068 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18069
18070 static int
18071 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18072 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18073 {
18074 if (sched_verbose)
18075 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18076
18077 /* Do Power6 dependent reordering if necessary. */
18078 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18079 return power6_sched_reorder2 (ready, *pn_ready - 1);
18080
18081 /* Do Power9 dependent reordering if necessary. */
18082 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18083 && recog_memoized (last_scheduled_insn) >= 0)
18084 return power9_sched_reorder2 (ready, *pn_ready - 1);
18085
18086 return cached_can_issue_more;
18087 }
18088
18089 /* Return whether the presence of INSN causes a dispatch group termination
18090 of group WHICH_GROUP.
18091
18092 If WHICH_GROUP == current_group, this function will return true if INSN
18093 causes the termination of the current group (i.e, the dispatch group to
18094 which INSN belongs). This means that INSN will be the last insn in the
18095 group it belongs to.
18096
18097 If WHICH_GROUP == previous_group, this function will return true if INSN
18098 causes the termination of the previous group (i.e, the dispatch group that
18099 precedes the group to which INSN belongs). This means that INSN will be
18100 the first insn in the group it belongs to). */
18101
18102 static bool
18103 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18104 {
18105 bool first, last;
18106
18107 if (! insn)
18108 return false;
18109
18110 first = insn_must_be_first_in_group (insn);
18111 last = insn_must_be_last_in_group (insn);
18112
18113 if (first && last)
18114 return true;
18115
18116 if (which_group == current_group)
18117 return last;
18118 else if (which_group == previous_group)
18119 return first;
18120
18121 return false;
18122 }
18123
18124
18125 static bool
18126 insn_must_be_first_in_group (rtx_insn *insn)
18127 {
18128 enum attr_type type;
18129
18130 if (!insn
18131 || NOTE_P (insn)
18132 || DEBUG_INSN_P (insn)
18133 || GET_CODE (PATTERN (insn)) == USE
18134 || GET_CODE (PATTERN (insn)) == CLOBBER)
18135 return false;
18136
18137 switch (rs6000_tune)
18138 {
18139 case PROCESSOR_POWER5:
18140 if (is_cracked_insn (insn))
18141 return true;
18142 /* FALLTHRU */
18143 case PROCESSOR_POWER4:
18144 if (is_microcoded_insn (insn))
18145 return true;
18146
18147 if (!rs6000_sched_groups)
18148 return false;
18149
18150 type = get_attr_type (insn);
18151
18152 switch (type)
18153 {
18154 case TYPE_MFCR:
18155 case TYPE_MFCRF:
18156 case TYPE_MTCR:
18157 case TYPE_CR_LOGICAL:
18158 case TYPE_MTJMPR:
18159 case TYPE_MFJMPR:
18160 case TYPE_DIV:
18161 case TYPE_LOAD_L:
18162 case TYPE_STORE_C:
18163 case TYPE_ISYNC:
18164 case TYPE_SYNC:
18165 return true;
18166 default:
18167 break;
18168 }
18169 break;
18170 case PROCESSOR_POWER6:
18171 type = get_attr_type (insn);
18172
18173 switch (type)
18174 {
18175 case TYPE_EXTS:
18176 case TYPE_CNTLZ:
18177 case TYPE_TRAP:
18178 case TYPE_MUL:
18179 case TYPE_INSERT:
18180 case TYPE_FPCOMPARE:
18181 case TYPE_MFCR:
18182 case TYPE_MTCR:
18183 case TYPE_MFJMPR:
18184 case TYPE_MTJMPR:
18185 case TYPE_ISYNC:
18186 case TYPE_SYNC:
18187 case TYPE_LOAD_L:
18188 case TYPE_STORE_C:
18189 return true;
18190 case TYPE_SHIFT:
18191 if (get_attr_dot (insn) == DOT_NO
18192 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18193 return true;
18194 else
18195 break;
18196 case TYPE_DIV:
18197 if (get_attr_size (insn) == SIZE_32)
18198 return true;
18199 else
18200 break;
18201 case TYPE_LOAD:
18202 case TYPE_STORE:
18203 case TYPE_FPLOAD:
18204 case TYPE_FPSTORE:
18205 if (get_attr_update (insn) == UPDATE_YES)
18206 return true;
18207 else
18208 break;
18209 default:
18210 break;
18211 }
18212 break;
18213 case PROCESSOR_POWER7:
18214 type = get_attr_type (insn);
18215
18216 switch (type)
18217 {
18218 case TYPE_CR_LOGICAL:
18219 case TYPE_MFCR:
18220 case TYPE_MFCRF:
18221 case TYPE_MTCR:
18222 case TYPE_DIV:
18223 case TYPE_ISYNC:
18224 case TYPE_LOAD_L:
18225 case TYPE_STORE_C:
18226 case TYPE_MFJMPR:
18227 case TYPE_MTJMPR:
18228 return true;
18229 case TYPE_MUL:
18230 case TYPE_SHIFT:
18231 case TYPE_EXTS:
18232 if (get_attr_dot (insn) == DOT_YES)
18233 return true;
18234 else
18235 break;
18236 case TYPE_LOAD:
18237 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18238 || get_attr_update (insn) == UPDATE_YES)
18239 return true;
18240 else
18241 break;
18242 case TYPE_STORE:
18243 case TYPE_FPLOAD:
18244 case TYPE_FPSTORE:
18245 if (get_attr_update (insn) == UPDATE_YES)
18246 return true;
18247 else
18248 break;
18249 default:
18250 break;
18251 }
18252 break;
18253 case PROCESSOR_POWER8:
18254 type = get_attr_type (insn);
18255
18256 switch (type)
18257 {
18258 case TYPE_CR_LOGICAL:
18259 case TYPE_MFCR:
18260 case TYPE_MFCRF:
18261 case TYPE_MTCR:
18262 case TYPE_SYNC:
18263 case TYPE_ISYNC:
18264 case TYPE_LOAD_L:
18265 case TYPE_STORE_C:
18266 case TYPE_VECSTORE:
18267 case TYPE_MFJMPR:
18268 case TYPE_MTJMPR:
18269 return true;
18270 case TYPE_SHIFT:
18271 case TYPE_EXTS:
18272 case TYPE_MUL:
18273 if (get_attr_dot (insn) == DOT_YES)
18274 return true;
18275 else
18276 break;
18277 case TYPE_LOAD:
18278 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18279 || get_attr_update (insn) == UPDATE_YES)
18280 return true;
18281 else
18282 break;
18283 case TYPE_STORE:
18284 if (get_attr_update (insn) == UPDATE_YES
18285 && get_attr_indexed (insn) == INDEXED_YES)
18286 return true;
18287 else
18288 break;
18289 default:
18290 break;
18291 }
18292 break;
18293 default:
18294 break;
18295 }
18296
18297 return false;
18298 }
18299
18300 static bool
18301 insn_must_be_last_in_group (rtx_insn *insn)
18302 {
18303 enum attr_type type;
18304
18305 if (!insn
18306 || NOTE_P (insn)
18307 || DEBUG_INSN_P (insn)
18308 || GET_CODE (PATTERN (insn)) == USE
18309 || GET_CODE (PATTERN (insn)) == CLOBBER)
18310 return false;
18311
18312 switch (rs6000_tune) {
18313 case PROCESSOR_POWER4:
18314 case PROCESSOR_POWER5:
18315 if (is_microcoded_insn (insn))
18316 return true;
18317
18318 if (is_branch_slot_insn (insn))
18319 return true;
18320
18321 break;
18322 case PROCESSOR_POWER6:
18323 type = get_attr_type (insn);
18324
18325 switch (type)
18326 {
18327 case TYPE_EXTS:
18328 case TYPE_CNTLZ:
18329 case TYPE_TRAP:
18330 case TYPE_MUL:
18331 case TYPE_FPCOMPARE:
18332 case TYPE_MFCR:
18333 case TYPE_MTCR:
18334 case TYPE_MFJMPR:
18335 case TYPE_MTJMPR:
18336 case TYPE_ISYNC:
18337 case TYPE_SYNC:
18338 case TYPE_LOAD_L:
18339 case TYPE_STORE_C:
18340 return true;
18341 case TYPE_SHIFT:
18342 if (get_attr_dot (insn) == DOT_NO
18343 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18344 return true;
18345 else
18346 break;
18347 case TYPE_DIV:
18348 if (get_attr_size (insn) == SIZE_32)
18349 return true;
18350 else
18351 break;
18352 default:
18353 break;
18354 }
18355 break;
18356 case PROCESSOR_POWER7:
18357 type = get_attr_type (insn);
18358
18359 switch (type)
18360 {
18361 case TYPE_ISYNC:
18362 case TYPE_SYNC:
18363 case TYPE_LOAD_L:
18364 case TYPE_STORE_C:
18365 return true;
18366 case TYPE_LOAD:
18367 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18368 && get_attr_update (insn) == UPDATE_YES)
18369 return true;
18370 else
18371 break;
18372 case TYPE_STORE:
18373 if (get_attr_update (insn) == UPDATE_YES
18374 && get_attr_indexed (insn) == INDEXED_YES)
18375 return true;
18376 else
18377 break;
18378 default:
18379 break;
18380 }
18381 break;
18382 case PROCESSOR_POWER8:
18383 type = get_attr_type (insn);
18384
18385 switch (type)
18386 {
18387 case TYPE_MFCR:
18388 case TYPE_MTCR:
18389 case TYPE_ISYNC:
18390 case TYPE_SYNC:
18391 case TYPE_LOAD_L:
18392 case TYPE_STORE_C:
18393 return true;
18394 case TYPE_LOAD:
18395 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18396 && get_attr_update (insn) == UPDATE_YES)
18397 return true;
18398 else
18399 break;
18400 case TYPE_STORE:
18401 if (get_attr_update (insn) == UPDATE_YES
18402 && get_attr_indexed (insn) == INDEXED_YES)
18403 return true;
18404 else
18405 break;
18406 default:
18407 break;
18408 }
18409 break;
18410 default:
18411 break;
18412 }
18413
18414 return false;
18415 }
18416
18417 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18418 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18419
18420 static bool
18421 is_costly_group (rtx *group_insns, rtx next_insn)
18422 {
18423 int i;
18424 int issue_rate = rs6000_issue_rate ();
18425
18426 for (i = 0; i < issue_rate; i++)
18427 {
18428 sd_iterator_def sd_it;
18429 dep_t dep;
18430 rtx insn = group_insns[i];
18431
18432 if (!insn)
18433 continue;
18434
18435 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18436 {
18437 rtx next = DEP_CON (dep);
18438
18439 if (next == next_insn
18440 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18441 return true;
18442 }
18443 }
18444
18445 return false;
18446 }
18447
18448 /* Utility of the function redefine_groups.
18449 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18450 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18451 to keep it "far" (in a separate group) from GROUP_INSNS, following
18452 one of the following schemes, depending on the value of the flag
18453 -minsert_sched_nops = X:
18454 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18455 in order to force NEXT_INSN into a separate group.
18456 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18457 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18458 insertion (has a group just ended, how many vacant issue slots remain in the
18459 last group, and how many dispatch groups were encountered so far). */
18460
18461 static int
18462 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18463 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18464 int *group_count)
18465 {
18466 rtx nop;
18467 bool force;
18468 int issue_rate = rs6000_issue_rate ();
18469 bool end = *group_end;
18470 int i;
18471
18472 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18473 return can_issue_more;
18474
18475 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18476 return can_issue_more;
18477
18478 force = is_costly_group (group_insns, next_insn);
18479 if (!force)
18480 return can_issue_more;
18481
18482 if (sched_verbose > 6)
18483 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18484 *group_count ,can_issue_more);
18485
18486 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18487 {
18488 if (*group_end)
18489 can_issue_more = 0;
18490
18491 /* Since only a branch can be issued in the last issue_slot, it is
18492 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18493 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18494 in this case the last nop will start a new group and the branch
18495 will be forced to the new group. */
18496 if (can_issue_more && !is_branch_slot_insn (next_insn))
18497 can_issue_more--;
18498
18499 /* Do we have a special group ending nop? */
18500 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18501 || rs6000_tune == PROCESSOR_POWER8)
18502 {
18503 nop = gen_group_ending_nop ();
18504 emit_insn_before (nop, next_insn);
18505 can_issue_more = 0;
18506 }
18507 else
18508 while (can_issue_more > 0)
18509 {
18510 nop = gen_nop ();
18511 emit_insn_before (nop, next_insn);
18512 can_issue_more--;
18513 }
18514
18515 *group_end = true;
18516 return 0;
18517 }
18518
18519 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18520 {
18521 int n_nops = rs6000_sched_insert_nops;
18522
18523 /* Nops can't be issued from the branch slot, so the effective
18524 issue_rate for nops is 'issue_rate - 1'. */
18525 if (can_issue_more == 0)
18526 can_issue_more = issue_rate;
18527 can_issue_more--;
18528 if (can_issue_more == 0)
18529 {
18530 can_issue_more = issue_rate - 1;
18531 (*group_count)++;
18532 end = true;
18533 for (i = 0; i < issue_rate; i++)
18534 {
18535 group_insns[i] = 0;
18536 }
18537 }
18538
18539 while (n_nops > 0)
18540 {
18541 nop = gen_nop ();
18542 emit_insn_before (nop, next_insn);
18543 if (can_issue_more == issue_rate - 1) /* new group begins */
18544 end = false;
18545 can_issue_more--;
18546 if (can_issue_more == 0)
18547 {
18548 can_issue_more = issue_rate - 1;
18549 (*group_count)++;
18550 end = true;
18551 for (i = 0; i < issue_rate; i++)
18552 {
18553 group_insns[i] = 0;
18554 }
18555 }
18556 n_nops--;
18557 }
18558
18559 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18560 can_issue_more++;
18561
18562 /* Is next_insn going to start a new group? */
18563 *group_end
18564 = (end
18565 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18566 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18567 || (can_issue_more < issue_rate &&
18568 insn_terminates_group_p (next_insn, previous_group)));
18569 if (*group_end && end)
18570 (*group_count)--;
18571
18572 if (sched_verbose > 6)
18573 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18574 *group_count, can_issue_more);
18575 return can_issue_more;
18576 }
18577
18578 return can_issue_more;
18579 }
18580
18581 /* This function tries to synch the dispatch groups that the compiler "sees"
18582 with the dispatch groups that the processor dispatcher is expected to
18583 form in practice. It tries to achieve this synchronization by forcing the
18584 estimated processor grouping on the compiler (as opposed to the function
18585 'pad_goups' which tries to force the scheduler's grouping on the processor).
18586
18587 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18588 examines the (estimated) dispatch groups that will be formed by the processor
18589 dispatcher. It marks these group boundaries to reflect the estimated
18590 processor grouping, overriding the grouping that the scheduler had marked.
18591 Depending on the value of the flag '-minsert-sched-nops' this function can
18592 force certain insns into separate groups or force a certain distance between
18593 them by inserting nops, for example, if there exists a "costly dependence"
18594 between the insns.
18595
18596 The function estimates the group boundaries that the processor will form as
18597 follows: It keeps track of how many vacant issue slots are available after
18598 each insn. A subsequent insn will start a new group if one of the following
18599 4 cases applies:
18600 - no more vacant issue slots remain in the current dispatch group.
18601 - only the last issue slot, which is the branch slot, is vacant, but the next
18602 insn is not a branch.
18603 - only the last 2 or less issue slots, including the branch slot, are vacant,
18604 which means that a cracked insn (which occupies two issue slots) can't be
18605 issued in this group.
18606 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18607 start a new group. */
18608
18609 static int
18610 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18611 rtx_insn *tail)
18612 {
18613 rtx_insn *insn, *next_insn;
18614 int issue_rate;
18615 int can_issue_more;
18616 int slot, i;
18617 bool group_end;
18618 int group_count = 0;
18619 rtx *group_insns;
18620
18621 /* Initialize. */
18622 issue_rate = rs6000_issue_rate ();
18623 group_insns = XALLOCAVEC (rtx, issue_rate);
18624 for (i = 0; i < issue_rate; i++)
18625 {
18626 group_insns[i] = 0;
18627 }
18628 can_issue_more = issue_rate;
18629 slot = 0;
18630 insn = get_next_active_insn (prev_head_insn, tail);
18631 group_end = false;
18632
18633 while (insn != NULL_RTX)
18634 {
18635 slot = (issue_rate - can_issue_more);
18636 group_insns[slot] = insn;
18637 can_issue_more =
18638 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18639 if (insn_terminates_group_p (insn, current_group))
18640 can_issue_more = 0;
18641
18642 next_insn = get_next_active_insn (insn, tail);
18643 if (next_insn == NULL_RTX)
18644 return group_count + 1;
18645
18646 /* Is next_insn going to start a new group? */
18647 group_end
18648 = (can_issue_more == 0
18649 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18650 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18651 || (can_issue_more < issue_rate &&
18652 insn_terminates_group_p (next_insn, previous_group)));
18653
18654 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18655 next_insn, &group_end, can_issue_more,
18656 &group_count);
18657
18658 if (group_end)
18659 {
18660 group_count++;
18661 can_issue_more = 0;
18662 for (i = 0; i < issue_rate; i++)
18663 {
18664 group_insns[i] = 0;
18665 }
18666 }
18667
18668 if (GET_MODE (next_insn) == TImode && can_issue_more)
18669 PUT_MODE (next_insn, VOIDmode);
18670 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18671 PUT_MODE (next_insn, TImode);
18672
18673 insn = next_insn;
18674 if (can_issue_more == 0)
18675 can_issue_more = issue_rate;
18676 } /* while */
18677
18678 return group_count;
18679 }
18680
18681 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18682 dispatch group boundaries that the scheduler had marked. Pad with nops
18683 any dispatch groups which have vacant issue slots, in order to force the
18684 scheduler's grouping on the processor dispatcher. The function
18685 returns the number of dispatch groups found. */
18686
18687 static int
18688 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18689 rtx_insn *tail)
18690 {
18691 rtx_insn *insn, *next_insn;
18692 rtx nop;
18693 int issue_rate;
18694 int can_issue_more;
18695 int group_end;
18696 int group_count = 0;
18697
18698 /* Initialize issue_rate. */
18699 issue_rate = rs6000_issue_rate ();
18700 can_issue_more = issue_rate;
18701
18702 insn = get_next_active_insn (prev_head_insn, tail);
18703 next_insn = get_next_active_insn (insn, tail);
18704
18705 while (insn != NULL_RTX)
18706 {
18707 can_issue_more =
18708 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18709
18710 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18711
18712 if (next_insn == NULL_RTX)
18713 break;
18714
18715 if (group_end)
18716 {
18717 /* If the scheduler had marked group termination at this location
18718 (between insn and next_insn), and neither insn nor next_insn will
18719 force group termination, pad the group with nops to force group
18720 termination. */
18721 if (can_issue_more
18722 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18723 && !insn_terminates_group_p (insn, current_group)
18724 && !insn_terminates_group_p (next_insn, previous_group))
18725 {
18726 if (!is_branch_slot_insn (next_insn))
18727 can_issue_more--;
18728
18729 while (can_issue_more)
18730 {
18731 nop = gen_nop ();
18732 emit_insn_before (nop, next_insn);
18733 can_issue_more--;
18734 }
18735 }
18736
18737 can_issue_more = issue_rate;
18738 group_count++;
18739 }
18740
18741 insn = next_insn;
18742 next_insn = get_next_active_insn (insn, tail);
18743 }
18744
18745 return group_count;
18746 }
18747
18748 /* We're beginning a new block. Initialize data structures as necessary. */
18749
18750 static void
18751 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18752 int sched_verbose ATTRIBUTE_UNUSED,
18753 int max_ready ATTRIBUTE_UNUSED)
18754 {
18755 last_scheduled_insn = NULL;
18756 load_store_pendulum = 0;
18757 divide_cnt = 0;
18758 vec_pairing = 0;
18759 }
18760
18761 /* The following function is called at the end of scheduling BB.
18762 After reload, it inserts nops at insn group bundling. */
18763
18764 static void
18765 rs6000_sched_finish (FILE *dump, int sched_verbose)
18766 {
18767 int n_groups;
18768
18769 if (sched_verbose)
18770 fprintf (dump, "=== Finishing schedule.\n");
18771
18772 if (reload_completed && rs6000_sched_groups)
18773 {
18774 /* Do not run sched_finish hook when selective scheduling enabled. */
18775 if (sel_sched_p ())
18776 return;
18777
18778 if (rs6000_sched_insert_nops == sched_finish_none)
18779 return;
18780
18781 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18782 n_groups = pad_groups (dump, sched_verbose,
18783 current_sched_info->prev_head,
18784 current_sched_info->next_tail);
18785 else
18786 n_groups = redefine_groups (dump, sched_verbose,
18787 current_sched_info->prev_head,
18788 current_sched_info->next_tail);
18789
18790 if (sched_verbose >= 6)
18791 {
18792 fprintf (dump, "ngroups = %d\n", n_groups);
18793 print_rtl (dump, current_sched_info->prev_head);
18794 fprintf (dump, "Done finish_sched\n");
18795 }
18796 }
18797 }
18798
18799 struct rs6000_sched_context
18800 {
18801 short cached_can_issue_more;
18802 rtx_insn *last_scheduled_insn;
18803 int load_store_pendulum;
18804 int divide_cnt;
18805 int vec_pairing;
18806 };
18807
18808 typedef struct rs6000_sched_context rs6000_sched_context_def;
18809 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18810
18811 /* Allocate store for new scheduling context. */
18812 static void *
18813 rs6000_alloc_sched_context (void)
18814 {
18815 return xmalloc (sizeof (rs6000_sched_context_def));
18816 }
18817
18818 /* If CLEAN_P is true then initializes _SC with clean data,
18819 and from the global context otherwise. */
18820 static void
18821 rs6000_init_sched_context (void *_sc, bool clean_p)
18822 {
18823 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18824
18825 if (clean_p)
18826 {
18827 sc->cached_can_issue_more = 0;
18828 sc->last_scheduled_insn = NULL;
18829 sc->load_store_pendulum = 0;
18830 sc->divide_cnt = 0;
18831 sc->vec_pairing = 0;
18832 }
18833 else
18834 {
18835 sc->cached_can_issue_more = cached_can_issue_more;
18836 sc->last_scheduled_insn = last_scheduled_insn;
18837 sc->load_store_pendulum = load_store_pendulum;
18838 sc->divide_cnt = divide_cnt;
18839 sc->vec_pairing = vec_pairing;
18840 }
18841 }
18842
18843 /* Sets the global scheduling context to the one pointed to by _SC. */
18844 static void
18845 rs6000_set_sched_context (void *_sc)
18846 {
18847 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18848
18849 gcc_assert (sc != NULL);
18850
18851 cached_can_issue_more = sc->cached_can_issue_more;
18852 last_scheduled_insn = sc->last_scheduled_insn;
18853 load_store_pendulum = sc->load_store_pendulum;
18854 divide_cnt = sc->divide_cnt;
18855 vec_pairing = sc->vec_pairing;
18856 }
18857
18858 /* Free _SC. */
18859 static void
18860 rs6000_free_sched_context (void *_sc)
18861 {
18862 gcc_assert (_sc != NULL);
18863
18864 free (_sc);
18865 }
18866
18867 static bool
18868 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18869 {
18870 switch (get_attr_type (insn))
18871 {
18872 case TYPE_DIV:
18873 case TYPE_SDIV:
18874 case TYPE_DDIV:
18875 case TYPE_VECDIV:
18876 case TYPE_SSQRT:
18877 case TYPE_DSQRT:
18878 return false;
18879
18880 default:
18881 return true;
18882 }
18883 }
18884 \f
18885 /* Length in units of the trampoline for entering a nested function. */
18886
18887 int
18888 rs6000_trampoline_size (void)
18889 {
18890 int ret = 0;
18891
18892 switch (DEFAULT_ABI)
18893 {
18894 default:
18895 gcc_unreachable ();
18896
18897 case ABI_AIX:
18898 ret = (TARGET_32BIT) ? 12 : 24;
18899 break;
18900
18901 case ABI_ELFv2:
18902 gcc_assert (!TARGET_32BIT);
18903 ret = 32;
18904 break;
18905
18906 case ABI_DARWIN:
18907 case ABI_V4:
18908 ret = (TARGET_32BIT) ? 40 : 48;
18909 break;
18910 }
18911
18912 return ret;
18913 }
18914
18915 /* Emit RTL insns to initialize the variable parts of a trampoline.
18916 FNADDR is an RTX for the address of the function's pure code.
18917 CXT is an RTX for the static chain value for the function. */
18918
18919 static void
18920 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18921 {
18922 int regsize = (TARGET_32BIT) ? 4 : 8;
18923 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
18924 rtx ctx_reg = force_reg (Pmode, cxt);
18925 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
18926
18927 switch (DEFAULT_ABI)
18928 {
18929 default:
18930 gcc_unreachable ();
18931
18932 /* Under AIX, just build the 3 word function descriptor */
18933 case ABI_AIX:
18934 {
18935 rtx fnmem, fn_reg, toc_reg;
18936
18937 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
18938 error ("you cannot take the address of a nested function if you use "
18939 "the %qs option", "-mno-pointers-to-nested-functions");
18940
18941 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
18942 fn_reg = gen_reg_rtx (Pmode);
18943 toc_reg = gen_reg_rtx (Pmode);
18944
18945 /* Macro to shorten the code expansions below. */
18946 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18947
18948 m_tramp = replace_equiv_address (m_tramp, addr);
18949
18950 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
18951 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
18952 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
18953 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
18954 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
18955
18956 # undef MEM_PLUS
18957 }
18958 break;
18959
18960 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18961 case ABI_ELFv2:
18962 case ABI_DARWIN:
18963 case ABI_V4:
18964 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
18965 LCT_NORMAL, VOIDmode,
18966 addr, Pmode,
18967 GEN_INT (rs6000_trampoline_size ()), SImode,
18968 fnaddr, Pmode,
18969 ctx_reg, Pmode);
18970 break;
18971 }
18972 }
18973
18974 \f
18975 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
18976 identifier as an argument, so the front end shouldn't look it up. */
18977
18978 static bool
18979 rs6000_attribute_takes_identifier_p (const_tree attr_id)
18980 {
18981 return is_attribute_p ("altivec", attr_id);
18982 }
18983
18984 /* Handle the "altivec" attribute. The attribute may have
18985 arguments as follows:
18986
18987 __attribute__((altivec(vector__)))
18988 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
18989 __attribute__((altivec(bool__))) (always followed by 'unsigned')
18990
18991 and may appear more than once (e.g., 'vector bool char') in a
18992 given declaration. */
18993
18994 static tree
18995 rs6000_handle_altivec_attribute (tree *node,
18996 tree name ATTRIBUTE_UNUSED,
18997 tree args,
18998 int flags ATTRIBUTE_UNUSED,
18999 bool *no_add_attrs)
19000 {
19001 tree type = *node, result = NULL_TREE;
19002 machine_mode mode;
19003 int unsigned_p;
19004 char altivec_type
19005 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19006 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19007 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19008 : '?');
19009
19010 while (POINTER_TYPE_P (type)
19011 || TREE_CODE (type) == FUNCTION_TYPE
19012 || TREE_CODE (type) == METHOD_TYPE
19013 || TREE_CODE (type) == ARRAY_TYPE)
19014 type = TREE_TYPE (type);
19015
19016 mode = TYPE_MODE (type);
19017
19018 /* Check for invalid AltiVec type qualifiers. */
19019 if (type == long_double_type_node)
19020 error ("use of %<long double%> in AltiVec types is invalid");
19021 else if (type == boolean_type_node)
19022 error ("use of boolean types in AltiVec types is invalid");
19023 else if (TREE_CODE (type) == COMPLEX_TYPE)
19024 error ("use of %<complex%> in AltiVec types is invalid");
19025 else if (DECIMAL_FLOAT_MODE_P (mode))
19026 error ("use of decimal floating point types in AltiVec types is invalid");
19027 else if (!TARGET_VSX)
19028 {
19029 if (type == long_unsigned_type_node || type == long_integer_type_node)
19030 {
19031 if (TARGET_64BIT)
19032 error ("use of %<long%> in AltiVec types is invalid for "
19033 "64-bit code without %qs", "-mvsx");
19034 else if (rs6000_warn_altivec_long)
19035 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19036 "use %<int%>");
19037 }
19038 else if (type == long_long_unsigned_type_node
19039 || type == long_long_integer_type_node)
19040 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19041 "-mvsx");
19042 else if (type == double_type_node)
19043 error ("use of %<double%> in AltiVec types is invalid without %qs",
19044 "-mvsx");
19045 }
19046
19047 switch (altivec_type)
19048 {
19049 case 'v':
19050 unsigned_p = TYPE_UNSIGNED (type);
19051 switch (mode)
19052 {
19053 case E_TImode:
19054 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19055 break;
19056 case E_DImode:
19057 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19058 break;
19059 case E_SImode:
19060 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19061 break;
19062 case E_HImode:
19063 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19064 break;
19065 case E_QImode:
19066 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19067 break;
19068 case E_SFmode: result = V4SF_type_node; break;
19069 case E_DFmode: result = V2DF_type_node; break;
19070 /* If the user says 'vector int bool', we may be handed the 'bool'
19071 attribute _before_ the 'vector' attribute, and so select the
19072 proper type in the 'b' case below. */
19073 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19074 case E_V2DImode: case E_V2DFmode:
19075 result = type;
19076 default: break;
19077 }
19078 break;
19079 case 'b':
19080 switch (mode)
19081 {
19082 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19083 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19084 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19085 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19086 default: break;
19087 }
19088 break;
19089 case 'p':
19090 switch (mode)
19091 {
19092 case E_V8HImode: result = pixel_V8HI_type_node;
19093 default: break;
19094 }
19095 default: break;
19096 }
19097
19098 /* Propagate qualifiers attached to the element type
19099 onto the vector type. */
19100 if (result && result != type && TYPE_QUALS (type))
19101 result = build_qualified_type (result, TYPE_QUALS (type));
19102
19103 *no_add_attrs = true; /* No need to hang on to the attribute. */
19104
19105 if (result)
19106 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19107
19108 return NULL_TREE;
19109 }
19110
19111 /* AltiVec defines five built-in scalar types that serve as vector
19112 elements; we must teach the compiler how to mangle them. The 128-bit
19113 floating point mangling is target-specific as well. */
19114
19115 static const char *
19116 rs6000_mangle_type (const_tree type)
19117 {
19118 type = TYPE_MAIN_VARIANT (type);
19119
19120 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19121 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19122 return NULL;
19123
19124 if (type == bool_char_type_node) return "U6__boolc";
19125 if (type == bool_short_type_node) return "U6__bools";
19126 if (type == pixel_type_node) return "u7__pixel";
19127 if (type == bool_int_type_node) return "U6__booli";
19128 if (type == bool_long_long_type_node) return "U6__boolx";
19129
19130 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19131 return "g";
19132 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19133 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19134
19135 /* For all other types, use the default mangling. */
19136 return NULL;
19137 }
19138
19139 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19140 struct attribute_spec.handler. */
19141
19142 static tree
19143 rs6000_handle_longcall_attribute (tree *node, tree name,
19144 tree args ATTRIBUTE_UNUSED,
19145 int flags ATTRIBUTE_UNUSED,
19146 bool *no_add_attrs)
19147 {
19148 if (TREE_CODE (*node) != FUNCTION_TYPE
19149 && TREE_CODE (*node) != FIELD_DECL
19150 && TREE_CODE (*node) != TYPE_DECL)
19151 {
19152 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19153 name);
19154 *no_add_attrs = true;
19155 }
19156
19157 return NULL_TREE;
19158 }
19159
19160 /* Set longcall attributes on all functions declared when
19161 rs6000_default_long_calls is true. */
19162 static void
19163 rs6000_set_default_type_attributes (tree type)
19164 {
19165 if (rs6000_default_long_calls
19166 && (TREE_CODE (type) == FUNCTION_TYPE
19167 || TREE_CODE (type) == METHOD_TYPE))
19168 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19169 NULL_TREE,
19170 TYPE_ATTRIBUTES (type));
19171
19172 #if TARGET_MACHO
19173 darwin_set_default_type_attributes (type);
19174 #endif
19175 }
19176
19177 /* Return a reference suitable for calling a function with the
19178 longcall attribute. */
19179
19180 static rtx
19181 rs6000_longcall_ref (rtx call_ref, rtx arg)
19182 {
19183 /* System V adds '.' to the internal name, so skip them. */
19184 const char *call_name = XSTR (call_ref, 0);
19185 if (*call_name == '.')
19186 {
19187 while (*call_name == '.')
19188 call_name++;
19189
19190 tree node = get_identifier (call_name);
19191 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19192 }
19193
19194 if (TARGET_PLTSEQ)
19195 {
19196 rtx base = const0_rtx;
19197 int regno = 12;
19198 if (rs6000_pcrel_p (cfun))
19199 {
19200 rtx reg = gen_rtx_REG (Pmode, regno);
19201 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19202 UNSPEC_PLT_PCREL);
19203 emit_insn (gen_rtx_SET (reg, u));
19204 return reg;
19205 }
19206
19207 if (DEFAULT_ABI == ABI_ELFv2)
19208 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19209 else
19210 {
19211 if (flag_pic)
19212 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19213 regno = 11;
19214 }
19215 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19216 may be used by a function global entry point. For SysV4, r11
19217 is used by __glink_PLTresolve lazy resolver entry. */
19218 rtx reg = gen_rtx_REG (Pmode, regno);
19219 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19220 UNSPEC_PLT16_HA);
19221 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19222 UNSPEC_PLT16_LO);
19223 emit_insn (gen_rtx_SET (reg, hi));
19224 emit_insn (gen_rtx_SET (reg, lo));
19225 return reg;
19226 }
19227
19228 return force_reg (Pmode, call_ref);
19229 }
19230 \f
19231 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19232 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19233 #endif
19234
19235 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19236 struct attribute_spec.handler. */
19237 static tree
19238 rs6000_handle_struct_attribute (tree *node, tree name,
19239 tree args ATTRIBUTE_UNUSED,
19240 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19241 {
19242 tree *type = NULL;
19243 if (DECL_P (*node))
19244 {
19245 if (TREE_CODE (*node) == TYPE_DECL)
19246 type = &TREE_TYPE (*node);
19247 }
19248 else
19249 type = node;
19250
19251 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19252 || TREE_CODE (*type) == UNION_TYPE)))
19253 {
19254 warning (OPT_Wattributes, "%qE attribute ignored", name);
19255 *no_add_attrs = true;
19256 }
19257
19258 else if ((is_attribute_p ("ms_struct", name)
19259 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19260 || ((is_attribute_p ("gcc_struct", name)
19261 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19262 {
19263 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19264 name);
19265 *no_add_attrs = true;
19266 }
19267
19268 return NULL_TREE;
19269 }
19270
19271 static bool
19272 rs6000_ms_bitfield_layout_p (const_tree record_type)
19273 {
19274 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19275 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19277 }
19278 \f
19279 #ifdef USING_ELFOS_H
19280
19281 /* A get_unnamed_section callback, used for switching to toc_section. */
19282
19283 static void
19284 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19285 {
19286 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19287 && TARGET_MINIMAL_TOC)
19288 {
19289 if (!toc_initialized)
19290 {
19291 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19292 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19293 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19294 fprintf (asm_out_file, "\t.tc ");
19295 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19296 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19297 fprintf (asm_out_file, "\n");
19298
19299 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19300 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19301 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19302 fprintf (asm_out_file, " = .+32768\n");
19303 toc_initialized = 1;
19304 }
19305 else
19306 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19307 }
19308 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19309 {
19310 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19311 if (!toc_initialized)
19312 {
19313 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19314 toc_initialized = 1;
19315 }
19316 }
19317 else
19318 {
19319 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19320 if (!toc_initialized)
19321 {
19322 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19323 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19324 fprintf (asm_out_file, " = .+32768\n");
19325 toc_initialized = 1;
19326 }
19327 }
19328 }
19329
19330 /* Implement TARGET_ASM_INIT_SECTIONS. */
19331
19332 static void
19333 rs6000_elf_asm_init_sections (void)
19334 {
19335 toc_section
19336 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19337
19338 sdata2_section
19339 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19340 SDATA2_SECTION_ASM_OP);
19341 }
19342
19343 /* Implement TARGET_SELECT_RTX_SECTION. */
19344
19345 static section *
19346 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19347 unsigned HOST_WIDE_INT align)
19348 {
19349 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19350 return toc_section;
19351 else
19352 return default_elf_select_rtx_section (mode, x, align);
19353 }
19354 \f
19355 /* For a SYMBOL_REF, set generic flags and then perform some
19356 target-specific processing.
19357
19358 When the AIX ABI is requested on a non-AIX system, replace the
19359 function name with the real name (with a leading .) rather than the
19360 function descriptor name. This saves a lot of overriding code to
19361 read the prefixes. */
19362
19363 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19364 static void
19365 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19366 {
19367 default_encode_section_info (decl, rtl, first);
19368
19369 if (first
19370 && TREE_CODE (decl) == FUNCTION_DECL
19371 && !TARGET_AIX
19372 && DEFAULT_ABI == ABI_AIX)
19373 {
19374 rtx sym_ref = XEXP (rtl, 0);
19375 size_t len = strlen (XSTR (sym_ref, 0));
19376 char *str = XALLOCAVEC (char, len + 2);
19377 str[0] = '.';
19378 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19379 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19380 }
19381 }
19382
19383 static inline bool
19384 compare_section_name (const char *section, const char *templ)
19385 {
19386 int len;
19387
19388 len = strlen (templ);
19389 return (strncmp (section, templ, len) == 0
19390 && (section[len] == 0 || section[len] == '.'));
19391 }
19392
19393 bool
19394 rs6000_elf_in_small_data_p (const_tree decl)
19395 {
19396 if (rs6000_sdata == SDATA_NONE)
19397 return false;
19398
19399 /* We want to merge strings, so we never consider them small data. */
19400 if (TREE_CODE (decl) == STRING_CST)
19401 return false;
19402
19403 /* Functions are never in the small data area. */
19404 if (TREE_CODE (decl) == FUNCTION_DECL)
19405 return false;
19406
19407 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19408 {
19409 const char *section = DECL_SECTION_NAME (decl);
19410 if (compare_section_name (section, ".sdata")
19411 || compare_section_name (section, ".sdata2")
19412 || compare_section_name (section, ".gnu.linkonce.s")
19413 || compare_section_name (section, ".sbss")
19414 || compare_section_name (section, ".sbss2")
19415 || compare_section_name (section, ".gnu.linkonce.sb")
19416 || strcmp (section, ".PPC.EMB.sdata0") == 0
19417 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19418 return true;
19419 }
19420 else
19421 {
19422 /* If we are told not to put readonly data in sdata, then don't. */
19423 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19424 && !rs6000_readonly_in_sdata)
19425 return false;
19426
19427 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19428
19429 if (size > 0
19430 && size <= g_switch_value
19431 /* If it's not public, and we're not going to reference it there,
19432 there's no need to put it in the small data section. */
19433 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19434 return true;
19435 }
19436
19437 return false;
19438 }
19439
19440 #endif /* USING_ELFOS_H */
19441 \f
19442 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19443
19444 static bool
19445 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19446 {
19447 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19448 }
19449
19450 /* Do not place thread-local symbols refs in the object blocks. */
19451
19452 static bool
19453 rs6000_use_blocks_for_decl_p (const_tree decl)
19454 {
19455 return !DECL_THREAD_LOCAL_P (decl);
19456 }
19457 \f
19458 /* Return a REG that occurs in ADDR with coefficient 1.
19459 ADDR can be effectively incremented by incrementing REG.
19460
19461 r0 is special and we must not select it as an address
19462 register by this routine since our caller will try to
19463 increment the returned register via an "la" instruction. */
19464
19465 rtx
19466 find_addr_reg (rtx addr)
19467 {
19468 while (GET_CODE (addr) == PLUS)
19469 {
19470 if (REG_P (XEXP (addr, 0))
19471 && REGNO (XEXP (addr, 0)) != 0)
19472 addr = XEXP (addr, 0);
19473 else if (REG_P (XEXP (addr, 1))
19474 && REGNO (XEXP (addr, 1)) != 0)
19475 addr = XEXP (addr, 1);
19476 else if (CONSTANT_P (XEXP (addr, 0)))
19477 addr = XEXP (addr, 1);
19478 else if (CONSTANT_P (XEXP (addr, 1)))
19479 addr = XEXP (addr, 0);
19480 else
19481 gcc_unreachable ();
19482 }
19483 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19484 return addr;
19485 }
19486
19487 void
19488 rs6000_fatal_bad_address (rtx op)
19489 {
19490 fatal_insn ("bad address", op);
19491 }
19492
19493 #if TARGET_MACHO
19494
19495 vec<branch_island, va_gc> *branch_islands;
19496
19497 /* Remember to generate a branch island for far calls to the given
19498 function. */
19499
19500 static void
19501 add_compiler_branch_island (tree label_name, tree function_name,
19502 int line_number)
19503 {
19504 branch_island bi = {function_name, label_name, line_number};
19505 vec_safe_push (branch_islands, bi);
19506 }
19507
19508 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19509 already there or not. */
19510
19511 static int
19512 no_previous_def (tree function_name)
19513 {
19514 branch_island *bi;
19515 unsigned ix;
19516
19517 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19518 if (function_name == bi->function_name)
19519 return 0;
19520 return 1;
19521 }
19522
19523 /* GET_PREV_LABEL gets the label name from the previous definition of
19524 the function. */
19525
19526 static tree
19527 get_prev_label (tree function_name)
19528 {
19529 branch_island *bi;
19530 unsigned ix;
19531
19532 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19533 if (function_name == bi->function_name)
19534 return bi->label_name;
19535 return NULL_TREE;
19536 }
19537
19538 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19539
19540 void
19541 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19542 {
19543 unsigned int length;
19544 char *symbol_name, *lazy_ptr_name;
19545 char *local_label_0;
19546 static unsigned label = 0;
19547
19548 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19549 symb = (*targetm.strip_name_encoding) (symb);
19550
19551 length = strlen (symb);
19552 symbol_name = XALLOCAVEC (char, length + 32);
19553 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19554
19555 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19556 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19557
19558 if (MACHOPIC_PURE)
19559 {
19560 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19561 fprintf (file, "\t.align 5\n");
19562
19563 fprintf (file, "%s:\n", stub);
19564 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19565
19566 label++;
19567 local_label_0 = XALLOCAVEC (char, 16);
19568 sprintf (local_label_0, "L%u$spb", label);
19569
19570 fprintf (file, "\tmflr r0\n");
19571 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19572 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19573 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19574 lazy_ptr_name, local_label_0);
19575 fprintf (file, "\tmtlr r0\n");
19576 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19577 (TARGET_64BIT ? "ldu" : "lwzu"),
19578 lazy_ptr_name, local_label_0);
19579 fprintf (file, "\tmtctr r12\n");
19580 fprintf (file, "\tbctr\n");
19581 }
19582 else /* mdynamic-no-pic or mkernel. */
19583 {
19584 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19585 fprintf (file, "\t.align 4\n");
19586
19587 fprintf (file, "%s:\n", stub);
19588 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19589
19590 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19591 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19592 (TARGET_64BIT ? "ldu" : "lwzu"),
19593 lazy_ptr_name);
19594 fprintf (file, "\tmtctr r12\n");
19595 fprintf (file, "\tbctr\n");
19596 }
19597
19598 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19599 fprintf (file, "%s:\n", lazy_ptr_name);
19600 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19601 fprintf (file, "%sdyld_stub_binding_helper\n",
19602 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19603 }
19604
19605 /* Legitimize PIC addresses. If the address is already
19606 position-independent, we return ORIG. Newly generated
19607 position-independent addresses go into a reg. This is REG if non
19608 zero, otherwise we allocate register(s) as necessary. */
19609
19610 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19611
19612 rtx
19613 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19614 rtx reg)
19615 {
19616 rtx base, offset;
19617
19618 if (reg == NULL && !reload_completed)
19619 reg = gen_reg_rtx (Pmode);
19620
19621 if (GET_CODE (orig) == CONST)
19622 {
19623 rtx reg_temp;
19624
19625 if (GET_CODE (XEXP (orig, 0)) == PLUS
19626 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19627 return orig;
19628
19629 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19630
19631 /* Use a different reg for the intermediate value, as
19632 it will be marked UNCHANGING. */
19633 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19634 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19635 Pmode, reg_temp);
19636 offset =
19637 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19638 Pmode, reg);
19639
19640 if (CONST_INT_P (offset))
19641 {
19642 if (SMALL_INT (offset))
19643 return plus_constant (Pmode, base, INTVAL (offset));
19644 else if (!reload_completed)
19645 offset = force_reg (Pmode, offset);
19646 else
19647 {
19648 rtx mem = force_const_mem (Pmode, orig);
19649 return machopic_legitimize_pic_address (mem, Pmode, reg);
19650 }
19651 }
19652 return gen_rtx_PLUS (Pmode, base, offset);
19653 }
19654
19655 /* Fall back on generic machopic code. */
19656 return machopic_legitimize_pic_address (orig, mode, reg);
19657 }
19658
19659 /* Output a .machine directive for the Darwin assembler, and call
19660 the generic start_file routine. */
19661
19662 static void
19663 rs6000_darwin_file_start (void)
19664 {
19665 static const struct
19666 {
19667 const char *arg;
19668 const char *name;
19669 HOST_WIDE_INT if_set;
19670 } mapping[] = {
19671 { "ppc64", "ppc64", MASK_64BIT },
19672 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19673 { "power4", "ppc970", 0 },
19674 { "G5", "ppc970", 0 },
19675 { "7450", "ppc7450", 0 },
19676 { "7400", "ppc7400", MASK_ALTIVEC },
19677 { "G4", "ppc7400", 0 },
19678 { "750", "ppc750", 0 },
19679 { "740", "ppc750", 0 },
19680 { "G3", "ppc750", 0 },
19681 { "604e", "ppc604e", 0 },
19682 { "604", "ppc604", 0 },
19683 { "603e", "ppc603", 0 },
19684 { "603", "ppc603", 0 },
19685 { "601", "ppc601", 0 },
19686 { NULL, "ppc", 0 } };
19687 const char *cpu_id = "";
19688 size_t i;
19689
19690 rs6000_file_start ();
19691 darwin_file_start ();
19692
19693 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19694
19695 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19696 cpu_id = rs6000_default_cpu;
19697
19698 if (global_options_set.x_rs6000_cpu_index)
19699 cpu_id = processor_target_table[rs6000_cpu_index].name;
19700
19701 /* Look through the mapping array. Pick the first name that either
19702 matches the argument, has a bit set in IF_SET that is also set
19703 in the target flags, or has a NULL name. */
19704
19705 i = 0;
19706 while (mapping[i].arg != NULL
19707 && strcmp (mapping[i].arg, cpu_id) != 0
19708 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19709 i++;
19710
19711 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19712 }
19713
19714 #endif /* TARGET_MACHO */
19715
19716 #if TARGET_ELF
19717 static int
19718 rs6000_elf_reloc_rw_mask (void)
19719 {
19720 if (flag_pic)
19721 return 3;
19722 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19723 return 2;
19724 else
19725 return 0;
19726 }
19727
19728 /* Record an element in the table of global constructors. SYMBOL is
19729 a SYMBOL_REF of the function to be called; PRIORITY is a number
19730 between 0 and MAX_INIT_PRIORITY.
19731
19732 This differs from default_named_section_asm_out_constructor in
19733 that we have special handling for -mrelocatable. */
19734
19735 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19736 static void
19737 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19738 {
19739 const char *section = ".ctors";
19740 char buf[18];
19741
19742 if (priority != DEFAULT_INIT_PRIORITY)
19743 {
19744 sprintf (buf, ".ctors.%.5u",
19745 /* Invert the numbering so the linker puts us in the proper
19746 order; constructors are run from right to left, and the
19747 linker sorts in increasing order. */
19748 MAX_INIT_PRIORITY - priority);
19749 section = buf;
19750 }
19751
19752 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19753 assemble_align (POINTER_SIZE);
19754
19755 if (DEFAULT_ABI == ABI_V4
19756 && (TARGET_RELOCATABLE || flag_pic > 1))
19757 {
19758 fputs ("\t.long (", asm_out_file);
19759 output_addr_const (asm_out_file, symbol);
19760 fputs (")@fixup\n", asm_out_file);
19761 }
19762 else
19763 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19764 }
19765
19766 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19767 static void
19768 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19769 {
19770 const char *section = ".dtors";
19771 char buf[18];
19772
19773 if (priority != DEFAULT_INIT_PRIORITY)
19774 {
19775 sprintf (buf, ".dtors.%.5u",
19776 /* Invert the numbering so the linker puts us in the proper
19777 order; constructors are run from right to left, and the
19778 linker sorts in increasing order. */
19779 MAX_INIT_PRIORITY - priority);
19780 section = buf;
19781 }
19782
19783 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19784 assemble_align (POINTER_SIZE);
19785
19786 if (DEFAULT_ABI == ABI_V4
19787 && (TARGET_RELOCATABLE || flag_pic > 1))
19788 {
19789 fputs ("\t.long (", asm_out_file);
19790 output_addr_const (asm_out_file, symbol);
19791 fputs (")@fixup\n", asm_out_file);
19792 }
19793 else
19794 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19795 }
19796
19797 void
19798 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19799 {
19800 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19801 {
19802 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19803 ASM_OUTPUT_LABEL (file, name);
19804 fputs (DOUBLE_INT_ASM_OP, file);
19805 rs6000_output_function_entry (file, name);
19806 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19807 if (DOT_SYMBOLS)
19808 {
19809 fputs ("\t.size\t", file);
19810 assemble_name (file, name);
19811 fputs (",24\n\t.type\t.", file);
19812 assemble_name (file, name);
19813 fputs (",@function\n", file);
19814 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19815 {
19816 fputs ("\t.globl\t.", file);
19817 assemble_name (file, name);
19818 putc ('\n', file);
19819 }
19820 }
19821 else
19822 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19823 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19824 rs6000_output_function_entry (file, name);
19825 fputs (":\n", file);
19826 return;
19827 }
19828
19829 int uses_toc;
19830 if (DEFAULT_ABI == ABI_V4
19831 && (TARGET_RELOCATABLE || flag_pic > 1)
19832 && !TARGET_SECURE_PLT
19833 && (!constant_pool_empty_p () || crtl->profile)
19834 && (uses_toc = uses_TOC ()))
19835 {
19836 char buf[256];
19837
19838 if (uses_toc == 2)
19839 switch_to_other_text_partition ();
19840 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19841
19842 fprintf (file, "\t.long ");
19843 assemble_name (file, toc_label_name);
19844 need_toc_init = 1;
19845 putc ('-', file);
19846 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19847 assemble_name (file, buf);
19848 putc ('\n', file);
19849 if (uses_toc == 2)
19850 switch_to_other_text_partition ();
19851 }
19852
19853 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19854 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19855
19856 if (TARGET_CMODEL == CMODEL_LARGE
19857 && rs6000_global_entry_point_prologue_needed_p ())
19858 {
19859 char buf[256];
19860
19861 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19862
19863 fprintf (file, "\t.quad .TOC.-");
19864 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19865 assemble_name (file, buf);
19866 putc ('\n', file);
19867 }
19868
19869 if (DEFAULT_ABI == ABI_AIX)
19870 {
19871 const char *desc_name, *orig_name;
19872
19873 orig_name = (*targetm.strip_name_encoding) (name);
19874 desc_name = orig_name;
19875 while (*desc_name == '.')
19876 desc_name++;
19877
19878 if (TREE_PUBLIC (decl))
19879 fprintf (file, "\t.globl %s\n", desc_name);
19880
19881 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19882 fprintf (file, "%s:\n", desc_name);
19883 fprintf (file, "\t.long %s\n", orig_name);
19884 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19885 fputs ("\t.long 0\n", file);
19886 fprintf (file, "\t.previous\n");
19887 }
19888 ASM_OUTPUT_LABEL (file, name);
19889 }
19890
19891 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19892 static void
19893 rs6000_elf_file_end (void)
19894 {
19895 #ifdef HAVE_AS_GNU_ATTRIBUTE
19896 /* ??? The value emitted depends on options active at file end.
19897 Assume anyone using #pragma or attributes that might change
19898 options knows what they are doing. */
19899 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19900 && rs6000_passes_float)
19901 {
19902 int fp;
19903
19904 if (TARGET_HARD_FLOAT)
19905 fp = 1;
19906 else
19907 fp = 2;
19908 if (rs6000_passes_long_double)
19909 {
19910 if (!TARGET_LONG_DOUBLE_128)
19911 fp |= 2 * 4;
19912 else if (TARGET_IEEEQUAD)
19913 fp |= 3 * 4;
19914 else
19915 fp |= 1 * 4;
19916 }
19917 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19918 }
19919 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19920 {
19921 if (rs6000_passes_vector)
19922 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19923 (TARGET_ALTIVEC_ABI ? 2 : 1));
19924 if (rs6000_returns_struct)
19925 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
19926 aix_struct_return ? 2 : 1);
19927 }
19928 #endif
19929 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19930 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
19931 file_end_indicate_exec_stack ();
19932 #endif
19933
19934 if (flag_split_stack)
19935 file_end_indicate_split_stack ();
19936
19937 if (cpu_builtin_p)
19938 {
19939 /* We have expanded a CPU builtin, so we need to emit a reference to
19940 the special symbol that LIBC uses to declare it supports the
19941 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19942 switch_to_section (data_section);
19943 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
19944 fprintf (asm_out_file, "\t%s %s\n",
19945 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
19946 }
19947 }
19948 #endif
19949
19950 #if TARGET_XCOFF
19951
19952 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19953 #define HAVE_XCOFF_DWARF_EXTRAS 0
19954 #endif
19955
19956 static enum unwind_info_type
19957 rs6000_xcoff_debug_unwind_info (void)
19958 {
19959 return UI_NONE;
19960 }
19961
19962 static void
19963 rs6000_xcoff_asm_output_anchor (rtx symbol)
19964 {
19965 char buffer[100];
19966
19967 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
19968 SYMBOL_REF_BLOCK_OFFSET (symbol));
19969 fprintf (asm_out_file, "%s", SET_ASM_OP);
19970 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
19971 fprintf (asm_out_file, ",");
19972 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
19973 fprintf (asm_out_file, "\n");
19974 }
19975
19976 static void
19977 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
19978 {
19979 fputs (GLOBAL_ASM_OP, stream);
19980 RS6000_OUTPUT_BASENAME (stream, name);
19981 putc ('\n', stream);
19982 }
19983
19984 /* A get_unnamed_decl callback, used for read-only sections. PTR
19985 points to the section string variable. */
19986
19987 static void
19988 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
19989 {
19990 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
19991 *(const char *const *) directive,
19992 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
19993 }
19994
19995 /* Likewise for read-write sections. */
19996
19997 static void
19998 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
19999 {
20000 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20001 *(const char *const *) directive,
20002 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20003 }
20004
20005 static void
20006 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20007 {
20008 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20009 *(const char *const *) directive,
20010 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20011 }
20012
20013 /* A get_unnamed_section callback, used for switching to toc_section. */
20014
20015 static void
20016 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20017 {
20018 if (TARGET_MINIMAL_TOC)
20019 {
20020 /* toc_section is always selected at least once from
20021 rs6000_xcoff_file_start, so this is guaranteed to
20022 always be defined once and only once in each file. */
20023 if (!toc_initialized)
20024 {
20025 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20026 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20027 toc_initialized = 1;
20028 }
20029 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20030 (TARGET_32BIT ? "" : ",3"));
20031 }
20032 else
20033 fputs ("\t.toc\n", asm_out_file);
20034 }
20035
20036 /* Implement TARGET_ASM_INIT_SECTIONS. */
20037
20038 static void
20039 rs6000_xcoff_asm_init_sections (void)
20040 {
20041 read_only_data_section
20042 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20043 &xcoff_read_only_section_name);
20044
20045 private_data_section
20046 = get_unnamed_section (SECTION_WRITE,
20047 rs6000_xcoff_output_readwrite_section_asm_op,
20048 &xcoff_private_data_section_name);
20049
20050 read_only_private_data_section
20051 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20052 &xcoff_private_rodata_section_name);
20053
20054 tls_data_section
20055 = get_unnamed_section (SECTION_TLS,
20056 rs6000_xcoff_output_tls_section_asm_op,
20057 &xcoff_tls_data_section_name);
20058
20059 tls_private_data_section
20060 = get_unnamed_section (SECTION_TLS,
20061 rs6000_xcoff_output_tls_section_asm_op,
20062 &xcoff_private_data_section_name);
20063
20064 toc_section
20065 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20066
20067 readonly_data_section = read_only_data_section;
20068 }
20069
20070 static int
20071 rs6000_xcoff_reloc_rw_mask (void)
20072 {
20073 return 3;
20074 }
20075
20076 static void
20077 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20078 tree decl ATTRIBUTE_UNUSED)
20079 {
20080 int smclass;
20081 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20082
20083 if (flags & SECTION_EXCLUDE)
20084 smclass = 4;
20085 else if (flags & SECTION_DEBUG)
20086 {
20087 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20088 return;
20089 }
20090 else if (flags & SECTION_CODE)
20091 smclass = 0;
20092 else if (flags & SECTION_TLS)
20093 smclass = 3;
20094 else if (flags & SECTION_WRITE)
20095 smclass = 2;
20096 else
20097 smclass = 1;
20098
20099 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20100 (flags & SECTION_CODE) ? "." : "",
20101 name, suffix[smclass], flags & SECTION_ENTSIZE);
20102 }
20103
20104 #define IN_NAMED_SECTION(DECL) \
20105 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20106 && DECL_SECTION_NAME (DECL) != NULL)
20107
20108 static section *
20109 rs6000_xcoff_select_section (tree decl, int reloc,
20110 unsigned HOST_WIDE_INT align)
20111 {
20112 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20113 named section. */
20114 if (align > BIGGEST_ALIGNMENT)
20115 {
20116 resolve_unique_section (decl, reloc, true);
20117 if (IN_NAMED_SECTION (decl))
20118 return get_named_section (decl, NULL, reloc);
20119 }
20120
20121 if (decl_readonly_section (decl, reloc))
20122 {
20123 if (TREE_PUBLIC (decl))
20124 return read_only_data_section;
20125 else
20126 return read_only_private_data_section;
20127 }
20128 else
20129 {
20130 #if HAVE_AS_TLS
20131 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20132 {
20133 if (TREE_PUBLIC (decl))
20134 return tls_data_section;
20135 else if (bss_initializer_p (decl))
20136 {
20137 /* Convert to COMMON to emit in BSS. */
20138 DECL_COMMON (decl) = 1;
20139 return tls_comm_section;
20140 }
20141 else
20142 return tls_private_data_section;
20143 }
20144 else
20145 #endif
20146 if (TREE_PUBLIC (decl))
20147 return data_section;
20148 else
20149 return private_data_section;
20150 }
20151 }
20152
20153 static void
20154 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20155 {
20156 const char *name;
20157
20158 /* Use select_section for private data and uninitialized data with
20159 alignment <= BIGGEST_ALIGNMENT. */
20160 if (!TREE_PUBLIC (decl)
20161 || DECL_COMMON (decl)
20162 || (DECL_INITIAL (decl) == NULL_TREE
20163 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20164 || DECL_INITIAL (decl) == error_mark_node
20165 || (flag_zero_initialized_in_bss
20166 && initializer_zerop (DECL_INITIAL (decl))))
20167 return;
20168
20169 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20170 name = (*targetm.strip_name_encoding) (name);
20171 set_decl_section_name (decl, name);
20172 }
20173
20174 /* Select section for constant in constant pool.
20175
20176 On RS/6000, all constants are in the private read-only data area.
20177 However, if this is being placed in the TOC it must be output as a
20178 toc entry. */
20179
20180 static section *
20181 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20182 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20183 {
20184 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20185 return toc_section;
20186 else
20187 return read_only_private_data_section;
20188 }
20189
20190 /* Remove any trailing [DS] or the like from the symbol name. */
20191
20192 static const char *
20193 rs6000_xcoff_strip_name_encoding (const char *name)
20194 {
20195 size_t len;
20196 if (*name == '*')
20197 name++;
20198 len = strlen (name);
20199 if (name[len - 1] == ']')
20200 return ggc_alloc_string (name, len - 4);
20201 else
20202 return name;
20203 }
20204
20205 /* Section attributes. AIX is always PIC. */
20206
20207 static unsigned int
20208 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20209 {
20210 unsigned int align;
20211 unsigned int flags = default_section_type_flags (decl, name, reloc);
20212
20213 /* Align to at least UNIT size. */
20214 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20215 align = MIN_UNITS_PER_WORD;
20216 else
20217 /* Increase alignment of large objects if not already stricter. */
20218 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20219 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20220 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20221
20222 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20223 }
20224
20225 /* Output at beginning of assembler file.
20226
20227 Initialize the section names for the RS/6000 at this point.
20228
20229 Specify filename, including full path, to assembler.
20230
20231 We want to go into the TOC section so at least one .toc will be emitted.
20232 Also, in order to output proper .bs/.es pairs, we need at least one static
20233 [RW] section emitted.
20234
20235 Finally, declare mcount when profiling to make the assembler happy. */
20236
20237 static void
20238 rs6000_xcoff_file_start (void)
20239 {
20240 rs6000_gen_section_name (&xcoff_bss_section_name,
20241 main_input_filename, ".bss_");
20242 rs6000_gen_section_name (&xcoff_private_data_section_name,
20243 main_input_filename, ".rw_");
20244 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20245 main_input_filename, ".rop_");
20246 rs6000_gen_section_name (&xcoff_read_only_section_name,
20247 main_input_filename, ".ro_");
20248 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20249 main_input_filename, ".tls_");
20250 rs6000_gen_section_name (&xcoff_tbss_section_name,
20251 main_input_filename, ".tbss_[UL]");
20252
20253 fputs ("\t.file\t", asm_out_file);
20254 output_quoted_string (asm_out_file, main_input_filename);
20255 fputc ('\n', asm_out_file);
20256 if (write_symbols != NO_DEBUG)
20257 switch_to_section (private_data_section);
20258 switch_to_section (toc_section);
20259 switch_to_section (text_section);
20260 if (profile_flag)
20261 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20262 rs6000_file_start ();
20263 }
20264
20265 /* Output at end of assembler file.
20266 On the RS/6000, referencing data should automatically pull in text. */
20267
20268 static void
20269 rs6000_xcoff_file_end (void)
20270 {
20271 switch_to_section (text_section);
20272 fputs ("_section_.text:\n", asm_out_file);
20273 switch_to_section (data_section);
20274 fputs (TARGET_32BIT
20275 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20276 asm_out_file);
20277 }
20278
20279 struct declare_alias_data
20280 {
20281 FILE *file;
20282 bool function_descriptor;
20283 };
20284
20285 /* Declare alias N. A helper function for for_node_and_aliases. */
20286
20287 static bool
20288 rs6000_declare_alias (struct symtab_node *n, void *d)
20289 {
20290 struct declare_alias_data *data = (struct declare_alias_data *)d;
20291 /* Main symbol is output specially, because varasm machinery does part of
20292 the job for us - we do not need to declare .globl/lglobs and such. */
20293 if (!n->alias || n->weakref)
20294 return false;
20295
20296 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20297 return false;
20298
20299 /* Prevent assemble_alias from trying to use .set pseudo operation
20300 that does not behave as expected by the middle-end. */
20301 TREE_ASM_WRITTEN (n->decl) = true;
20302
20303 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20304 char *buffer = (char *) alloca (strlen (name) + 2);
20305 char *p;
20306 int dollar_inside = 0;
20307
20308 strcpy (buffer, name);
20309 p = strchr (buffer, '$');
20310 while (p) {
20311 *p = '_';
20312 dollar_inside++;
20313 p = strchr (p + 1, '$');
20314 }
20315 if (TREE_PUBLIC (n->decl))
20316 {
20317 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20318 {
20319 if (dollar_inside) {
20320 if (data->function_descriptor)
20321 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20322 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20323 }
20324 if (data->function_descriptor)
20325 {
20326 fputs ("\t.globl .", data->file);
20327 RS6000_OUTPUT_BASENAME (data->file, buffer);
20328 putc ('\n', data->file);
20329 }
20330 fputs ("\t.globl ", data->file);
20331 RS6000_OUTPUT_BASENAME (data->file, buffer);
20332 putc ('\n', data->file);
20333 }
20334 #ifdef ASM_WEAKEN_DECL
20335 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20336 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20337 #endif
20338 }
20339 else
20340 {
20341 if (dollar_inside)
20342 {
20343 if (data->function_descriptor)
20344 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20345 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20346 }
20347 if (data->function_descriptor)
20348 {
20349 fputs ("\t.lglobl .", data->file);
20350 RS6000_OUTPUT_BASENAME (data->file, buffer);
20351 putc ('\n', data->file);
20352 }
20353 fputs ("\t.lglobl ", data->file);
20354 RS6000_OUTPUT_BASENAME (data->file, buffer);
20355 putc ('\n', data->file);
20356 }
20357 if (data->function_descriptor)
20358 fputs (".", data->file);
20359 RS6000_OUTPUT_BASENAME (data->file, buffer);
20360 fputs (":\n", data->file);
20361 return false;
20362 }
20363
20364
20365 #ifdef HAVE_GAS_HIDDEN
20366 /* Helper function to calculate visibility of a DECL
20367 and return the value as a const string. */
20368
20369 static const char *
20370 rs6000_xcoff_visibility (tree decl)
20371 {
20372 static const char * const visibility_types[] = {
20373 "", ",protected", ",hidden", ",internal"
20374 };
20375
20376 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20377 return visibility_types[vis];
20378 }
20379 #endif
20380
20381
20382 /* This macro produces the initial definition of a function name.
20383 On the RS/6000, we need to place an extra '.' in the function name and
20384 output the function descriptor.
20385 Dollar signs are converted to underscores.
20386
20387 The csect for the function will have already been created when
20388 text_section was selected. We do have to go back to that csect, however.
20389
20390 The third and fourth parameters to the .function pseudo-op (16 and 044)
20391 are placeholders which no longer have any use.
20392
20393 Because AIX assembler's .set command has unexpected semantics, we output
20394 all aliases as alternative labels in front of the definition. */
20395
20396 void
20397 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20398 {
20399 char *buffer = (char *) alloca (strlen (name) + 1);
20400 char *p;
20401 int dollar_inside = 0;
20402 struct declare_alias_data data = {file, false};
20403
20404 strcpy (buffer, name);
20405 p = strchr (buffer, '$');
20406 while (p) {
20407 *p = '_';
20408 dollar_inside++;
20409 p = strchr (p + 1, '$');
20410 }
20411 if (TREE_PUBLIC (decl))
20412 {
20413 if (!RS6000_WEAK || !DECL_WEAK (decl))
20414 {
20415 if (dollar_inside) {
20416 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20417 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20418 }
20419 fputs ("\t.globl .", file);
20420 RS6000_OUTPUT_BASENAME (file, buffer);
20421 #ifdef HAVE_GAS_HIDDEN
20422 fputs (rs6000_xcoff_visibility (decl), file);
20423 #endif
20424 putc ('\n', file);
20425 }
20426 }
20427 else
20428 {
20429 if (dollar_inside) {
20430 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20431 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20432 }
20433 fputs ("\t.lglobl .", file);
20434 RS6000_OUTPUT_BASENAME (file, buffer);
20435 putc ('\n', file);
20436 }
20437 fputs ("\t.csect ", file);
20438 RS6000_OUTPUT_BASENAME (file, buffer);
20439 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20440 RS6000_OUTPUT_BASENAME (file, buffer);
20441 fputs (":\n", file);
20442 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20443 &data, true);
20444 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20445 RS6000_OUTPUT_BASENAME (file, buffer);
20446 fputs (", TOC[tc0], 0\n", file);
20447 in_section = NULL;
20448 switch_to_section (function_section (decl));
20449 putc ('.', file);
20450 RS6000_OUTPUT_BASENAME (file, buffer);
20451 fputs (":\n", file);
20452 data.function_descriptor = true;
20453 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20454 &data, true);
20455 if (!DECL_IGNORED_P (decl))
20456 {
20457 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20458 xcoffout_declare_function (file, decl, buffer);
20459 else if (write_symbols == DWARF2_DEBUG)
20460 {
20461 name = (*targetm.strip_name_encoding) (name);
20462 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20463 }
20464 }
20465 return;
20466 }
20467
20468
20469 /* Output assembly language to globalize a symbol from a DECL,
20470 possibly with visibility. */
20471
20472 void
20473 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20474 {
20475 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20476 fputs (GLOBAL_ASM_OP, stream);
20477 RS6000_OUTPUT_BASENAME (stream, name);
20478 #ifdef HAVE_GAS_HIDDEN
20479 fputs (rs6000_xcoff_visibility (decl), stream);
20480 #endif
20481 putc ('\n', stream);
20482 }
20483
20484 /* Output assembly language to define a symbol as COMMON from a DECL,
20485 possibly with visibility. */
20486
20487 void
20488 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20489 tree decl ATTRIBUTE_UNUSED,
20490 const char *name,
20491 unsigned HOST_WIDE_INT size,
20492 unsigned HOST_WIDE_INT align)
20493 {
20494 unsigned HOST_WIDE_INT align2 = 2;
20495
20496 if (align > 32)
20497 align2 = floor_log2 (align / BITS_PER_UNIT);
20498 else if (size > 4)
20499 align2 = 3;
20500
20501 fputs (COMMON_ASM_OP, stream);
20502 RS6000_OUTPUT_BASENAME (stream, name);
20503
20504 fprintf (stream,
20505 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20506 size, align2);
20507
20508 #ifdef HAVE_GAS_HIDDEN
20509 if (decl != NULL)
20510 fputs (rs6000_xcoff_visibility (decl), stream);
20511 #endif
20512 putc ('\n', stream);
20513 }
20514
20515 /* This macro produces the initial definition of a object (variable) name.
20516 Because AIX assembler's .set command has unexpected semantics, we output
20517 all aliases as alternative labels in front of the definition. */
20518
20519 void
20520 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20521 {
20522 struct declare_alias_data data = {file, false};
20523 RS6000_OUTPUT_BASENAME (file, name);
20524 fputs (":\n", file);
20525 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20526 &data, true);
20527 }
20528
20529 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20530
20531 void
20532 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20533 {
20534 fputs (integer_asm_op (size, FALSE), file);
20535 assemble_name (file, label);
20536 fputs ("-$", file);
20537 }
20538
20539 /* Output a symbol offset relative to the dbase for the current object.
20540 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20541 signed offsets.
20542
20543 __gcc_unwind_dbase is embedded in all executables/libraries through
20544 libgcc/config/rs6000/crtdbase.S. */
20545
20546 void
20547 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20548 {
20549 fputs (integer_asm_op (size, FALSE), file);
20550 assemble_name (file, label);
20551 fputs("-__gcc_unwind_dbase", file);
20552 }
20553
20554 #ifdef HAVE_AS_TLS
20555 static void
20556 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20557 {
20558 rtx symbol;
20559 int flags;
20560 const char *symname;
20561
20562 default_encode_section_info (decl, rtl, first);
20563
20564 /* Careful not to prod global register variables. */
20565 if (!MEM_P (rtl))
20566 return;
20567 symbol = XEXP (rtl, 0);
20568 if (!SYMBOL_REF_P (symbol))
20569 return;
20570
20571 flags = SYMBOL_REF_FLAGS (symbol);
20572
20573 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20574 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20575
20576 SYMBOL_REF_FLAGS (symbol) = flags;
20577
20578 /* Append mapping class to extern decls. */
20579 symname = XSTR (symbol, 0);
20580 if (decl /* sync condition with assemble_external () */
20581 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20582 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20583 || TREE_CODE (decl) == FUNCTION_DECL)
20584 && symname[strlen (symname) - 1] != ']')
20585 {
20586 char *newname = (char *) alloca (strlen (symname) + 5);
20587 strcpy (newname, symname);
20588 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20589 ? "[DS]" : "[UA]"));
20590 XSTR (symbol, 0) = ggc_strdup (newname);
20591 }
20592 }
20593 #endif /* HAVE_AS_TLS */
20594 #endif /* TARGET_XCOFF */
20595
20596 void
20597 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20598 const char *name, const char *val)
20599 {
20600 fputs ("\t.weak\t", stream);
20601 RS6000_OUTPUT_BASENAME (stream, name);
20602 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20603 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20604 {
20605 if (TARGET_XCOFF)
20606 fputs ("[DS]", stream);
20607 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20608 if (TARGET_XCOFF)
20609 fputs (rs6000_xcoff_visibility (decl), stream);
20610 #endif
20611 fputs ("\n\t.weak\t.", stream);
20612 RS6000_OUTPUT_BASENAME (stream, name);
20613 }
20614 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20615 if (TARGET_XCOFF)
20616 fputs (rs6000_xcoff_visibility (decl), stream);
20617 #endif
20618 fputc ('\n', stream);
20619 if (val)
20620 {
20621 #ifdef ASM_OUTPUT_DEF
20622 ASM_OUTPUT_DEF (stream, name, val);
20623 #endif
20624 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20625 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20626 {
20627 fputs ("\t.set\t.", stream);
20628 RS6000_OUTPUT_BASENAME (stream, name);
20629 fputs (",.", stream);
20630 RS6000_OUTPUT_BASENAME (stream, val);
20631 fputc ('\n', stream);
20632 }
20633 }
20634 }
20635
20636
20637 /* Return true if INSN should not be copied. */
20638
20639 static bool
20640 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20641 {
20642 return recog_memoized (insn) >= 0
20643 && get_attr_cannot_copy (insn);
20644 }
20645
20646 /* Compute a (partial) cost for rtx X. Return true if the complete
20647 cost has been computed, and false if subexpressions should be
20648 scanned. In either case, *TOTAL contains the cost result. */
20649
20650 static bool
20651 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20652 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20653 {
20654 int code = GET_CODE (x);
20655
20656 switch (code)
20657 {
20658 /* On the RS/6000, if it is valid in the insn, it is free. */
20659 case CONST_INT:
20660 if (((outer_code == SET
20661 || outer_code == PLUS
20662 || outer_code == MINUS)
20663 && (satisfies_constraint_I (x)
20664 || satisfies_constraint_L (x)))
20665 || (outer_code == AND
20666 && (satisfies_constraint_K (x)
20667 || (mode == SImode
20668 ? satisfies_constraint_L (x)
20669 : satisfies_constraint_J (x))))
20670 || ((outer_code == IOR || outer_code == XOR)
20671 && (satisfies_constraint_K (x)
20672 || (mode == SImode
20673 ? satisfies_constraint_L (x)
20674 : satisfies_constraint_J (x))))
20675 || outer_code == ASHIFT
20676 || outer_code == ASHIFTRT
20677 || outer_code == LSHIFTRT
20678 || outer_code == ROTATE
20679 || outer_code == ROTATERT
20680 || outer_code == ZERO_EXTRACT
20681 || (outer_code == MULT
20682 && satisfies_constraint_I (x))
20683 || ((outer_code == DIV || outer_code == UDIV
20684 || outer_code == MOD || outer_code == UMOD)
20685 && exact_log2 (INTVAL (x)) >= 0)
20686 || (outer_code == COMPARE
20687 && (satisfies_constraint_I (x)
20688 || satisfies_constraint_K (x)))
20689 || ((outer_code == EQ || outer_code == NE)
20690 && (satisfies_constraint_I (x)
20691 || satisfies_constraint_K (x)
20692 || (mode == SImode
20693 ? satisfies_constraint_L (x)
20694 : satisfies_constraint_J (x))))
20695 || (outer_code == GTU
20696 && satisfies_constraint_I (x))
20697 || (outer_code == LTU
20698 && satisfies_constraint_P (x)))
20699 {
20700 *total = 0;
20701 return true;
20702 }
20703 else if ((outer_code == PLUS
20704 && reg_or_add_cint_operand (x, VOIDmode))
20705 || (outer_code == MINUS
20706 && reg_or_sub_cint_operand (x, VOIDmode))
20707 || ((outer_code == SET
20708 || outer_code == IOR
20709 || outer_code == XOR)
20710 && (INTVAL (x)
20711 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20712 {
20713 *total = COSTS_N_INSNS (1);
20714 return true;
20715 }
20716 /* FALLTHRU */
20717
20718 case CONST_DOUBLE:
20719 case CONST_WIDE_INT:
20720 case CONST:
20721 case HIGH:
20722 case SYMBOL_REF:
20723 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20724 return true;
20725
20726 case MEM:
20727 /* When optimizing for size, MEM should be slightly more expensive
20728 than generating address, e.g., (plus (reg) (const)).
20729 L1 cache latency is about two instructions. */
20730 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20731 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20732 *total += COSTS_N_INSNS (100);
20733 return true;
20734
20735 case LABEL_REF:
20736 *total = 0;
20737 return true;
20738
20739 case PLUS:
20740 case MINUS:
20741 if (FLOAT_MODE_P (mode))
20742 *total = rs6000_cost->fp;
20743 else
20744 *total = COSTS_N_INSNS (1);
20745 return false;
20746
20747 case MULT:
20748 if (CONST_INT_P (XEXP (x, 1))
20749 && satisfies_constraint_I (XEXP (x, 1)))
20750 {
20751 if (INTVAL (XEXP (x, 1)) >= -256
20752 && INTVAL (XEXP (x, 1)) <= 255)
20753 *total = rs6000_cost->mulsi_const9;
20754 else
20755 *total = rs6000_cost->mulsi_const;
20756 }
20757 else if (mode == SFmode)
20758 *total = rs6000_cost->fp;
20759 else if (FLOAT_MODE_P (mode))
20760 *total = rs6000_cost->dmul;
20761 else if (mode == DImode)
20762 *total = rs6000_cost->muldi;
20763 else
20764 *total = rs6000_cost->mulsi;
20765 return false;
20766
20767 case FMA:
20768 if (mode == SFmode)
20769 *total = rs6000_cost->fp;
20770 else
20771 *total = rs6000_cost->dmul;
20772 break;
20773
20774 case DIV:
20775 case MOD:
20776 if (FLOAT_MODE_P (mode))
20777 {
20778 *total = mode == DFmode ? rs6000_cost->ddiv
20779 : rs6000_cost->sdiv;
20780 return false;
20781 }
20782 /* FALLTHRU */
20783
20784 case UDIV:
20785 case UMOD:
20786 if (CONST_INT_P (XEXP (x, 1))
20787 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20788 {
20789 if (code == DIV || code == MOD)
20790 /* Shift, addze */
20791 *total = COSTS_N_INSNS (2);
20792 else
20793 /* Shift */
20794 *total = COSTS_N_INSNS (1);
20795 }
20796 else
20797 {
20798 if (GET_MODE (XEXP (x, 1)) == DImode)
20799 *total = rs6000_cost->divdi;
20800 else
20801 *total = rs6000_cost->divsi;
20802 }
20803 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20804 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20805 *total += COSTS_N_INSNS (2);
20806 return false;
20807
20808 case CTZ:
20809 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20810 return false;
20811
20812 case FFS:
20813 *total = COSTS_N_INSNS (4);
20814 return false;
20815
20816 case POPCOUNT:
20817 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20818 return false;
20819
20820 case PARITY:
20821 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20822 return false;
20823
20824 case NOT:
20825 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20826 *total = 0;
20827 else
20828 *total = COSTS_N_INSNS (1);
20829 return false;
20830
20831 case AND:
20832 if (CONST_INT_P (XEXP (x, 1)))
20833 {
20834 rtx left = XEXP (x, 0);
20835 rtx_code left_code = GET_CODE (left);
20836
20837 /* rotate-and-mask: 1 insn. */
20838 if ((left_code == ROTATE
20839 || left_code == ASHIFT
20840 || left_code == LSHIFTRT)
20841 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20842 {
20843 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20844 if (!CONST_INT_P (XEXP (left, 1)))
20845 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20846 *total += COSTS_N_INSNS (1);
20847 return true;
20848 }
20849
20850 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20851 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20852 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20853 || (val & 0xffff) == val
20854 || (val & 0xffff0000) == val
20855 || ((val & 0xffff) == 0 && mode == SImode))
20856 {
20857 *total = rtx_cost (left, mode, AND, 0, speed);
20858 *total += COSTS_N_INSNS (1);
20859 return true;
20860 }
20861
20862 /* 2 insns. */
20863 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20864 {
20865 *total = rtx_cost (left, mode, AND, 0, speed);
20866 *total += COSTS_N_INSNS (2);
20867 return true;
20868 }
20869 }
20870
20871 *total = COSTS_N_INSNS (1);
20872 return false;
20873
20874 case IOR:
20875 /* FIXME */
20876 *total = COSTS_N_INSNS (1);
20877 return true;
20878
20879 case CLZ:
20880 case XOR:
20881 case ZERO_EXTRACT:
20882 *total = COSTS_N_INSNS (1);
20883 return false;
20884
20885 case ASHIFT:
20886 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20887 the sign extend and shift separately within the insn. */
20888 if (TARGET_EXTSWSLI && mode == DImode
20889 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20890 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20891 {
20892 *total = 0;
20893 return false;
20894 }
20895 /* fall through */
20896
20897 case ASHIFTRT:
20898 case LSHIFTRT:
20899 case ROTATE:
20900 case ROTATERT:
20901 /* Handle mul_highpart. */
20902 if (outer_code == TRUNCATE
20903 && GET_CODE (XEXP (x, 0)) == MULT)
20904 {
20905 if (mode == DImode)
20906 *total = rs6000_cost->muldi;
20907 else
20908 *total = rs6000_cost->mulsi;
20909 return true;
20910 }
20911 else if (outer_code == AND)
20912 *total = 0;
20913 else
20914 *total = COSTS_N_INSNS (1);
20915 return false;
20916
20917 case SIGN_EXTEND:
20918 case ZERO_EXTEND:
20919 if (MEM_P (XEXP (x, 0)))
20920 *total = 0;
20921 else
20922 *total = COSTS_N_INSNS (1);
20923 return false;
20924
20925 case COMPARE:
20926 case NEG:
20927 case ABS:
20928 if (!FLOAT_MODE_P (mode))
20929 {
20930 *total = COSTS_N_INSNS (1);
20931 return false;
20932 }
20933 /* FALLTHRU */
20934
20935 case FLOAT:
20936 case UNSIGNED_FLOAT:
20937 case FIX:
20938 case UNSIGNED_FIX:
20939 case FLOAT_TRUNCATE:
20940 *total = rs6000_cost->fp;
20941 return false;
20942
20943 case FLOAT_EXTEND:
20944 if (mode == DFmode)
20945 *total = rs6000_cost->sfdf_convert;
20946 else
20947 *total = rs6000_cost->fp;
20948 return false;
20949
20950 case CALL:
20951 case IF_THEN_ELSE:
20952 if (!speed)
20953 {
20954 *total = COSTS_N_INSNS (1);
20955 return true;
20956 }
20957 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
20958 {
20959 *total = rs6000_cost->fp;
20960 return false;
20961 }
20962 break;
20963
20964 case NE:
20965 case EQ:
20966 case GTU:
20967 case LTU:
20968 /* Carry bit requires mode == Pmode.
20969 NEG or PLUS already counted so only add one. */
20970 if (mode == Pmode
20971 && (outer_code == NEG || outer_code == PLUS))
20972 {
20973 *total = COSTS_N_INSNS (1);
20974 return true;
20975 }
20976 /* FALLTHRU */
20977
20978 case GT:
20979 case LT:
20980 case UNORDERED:
20981 if (outer_code == SET)
20982 {
20983 if (XEXP (x, 1) == const0_rtx)
20984 {
20985 *total = COSTS_N_INSNS (2);
20986 return true;
20987 }
20988 else
20989 {
20990 *total = COSTS_N_INSNS (3);
20991 return false;
20992 }
20993 }
20994 /* CC COMPARE. */
20995 if (outer_code == COMPARE)
20996 {
20997 *total = 0;
20998 return true;
20999 }
21000 break;
21001
21002 default:
21003 break;
21004 }
21005
21006 return false;
21007 }
21008
21009 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21010
21011 static bool
21012 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21013 int opno, int *total, bool speed)
21014 {
21015 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21016
21017 fprintf (stderr,
21018 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21019 "opno = %d, total = %d, speed = %s, x:\n",
21020 ret ? "complete" : "scan inner",
21021 GET_MODE_NAME (mode),
21022 GET_RTX_NAME (outer_code),
21023 opno,
21024 *total,
21025 speed ? "true" : "false");
21026
21027 debug_rtx (x);
21028
21029 return ret;
21030 }
21031
21032 static int
21033 rs6000_insn_cost (rtx_insn *insn, bool speed)
21034 {
21035 if (recog_memoized (insn) < 0)
21036 return 0;
21037
21038 /* If we are optimizing for size, just use the length. */
21039 if (!speed)
21040 return get_attr_length (insn);
21041
21042 /* Use the cost if provided. */
21043 int cost = get_attr_cost (insn);
21044 if (cost > 0)
21045 return cost;
21046
21047 /* If the insn tells us how many insns there are, use that. Otherwise use
21048 the length/4. Adjust the insn length to remove the extra size that
21049 prefixed instructions take. */
21050 int n = get_attr_num_insns (insn);
21051 if (n == 0)
21052 {
21053 int length = get_attr_length (insn);
21054 if (get_attr_prefixed (insn) == PREFIXED_YES)
21055 {
21056 int adjust = 0;
21057 ADJUST_INSN_LENGTH (insn, adjust);
21058 length -= adjust;
21059 }
21060
21061 n = length / 4;
21062 }
21063
21064 enum attr_type type = get_attr_type (insn);
21065
21066 switch (type)
21067 {
21068 case TYPE_LOAD:
21069 case TYPE_FPLOAD:
21070 case TYPE_VECLOAD:
21071 cost = COSTS_N_INSNS (n + 1);
21072 break;
21073
21074 case TYPE_MUL:
21075 switch (get_attr_size (insn))
21076 {
21077 case SIZE_8:
21078 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21079 break;
21080 case SIZE_16:
21081 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21082 break;
21083 case SIZE_32:
21084 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21085 break;
21086 case SIZE_64:
21087 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21088 break;
21089 default:
21090 gcc_unreachable ();
21091 }
21092 break;
21093 case TYPE_DIV:
21094 switch (get_attr_size (insn))
21095 {
21096 case SIZE_32:
21097 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21098 break;
21099 case SIZE_64:
21100 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21101 break;
21102 default:
21103 gcc_unreachable ();
21104 }
21105 break;
21106
21107 case TYPE_FP:
21108 cost = n * rs6000_cost->fp;
21109 break;
21110 case TYPE_DMUL:
21111 cost = n * rs6000_cost->dmul;
21112 break;
21113 case TYPE_SDIV:
21114 cost = n * rs6000_cost->sdiv;
21115 break;
21116 case TYPE_DDIV:
21117 cost = n * rs6000_cost->ddiv;
21118 break;
21119
21120 case TYPE_SYNC:
21121 case TYPE_LOAD_L:
21122 case TYPE_MFCR:
21123 case TYPE_MFCRF:
21124 cost = COSTS_N_INSNS (n + 2);
21125 break;
21126
21127 default:
21128 cost = COSTS_N_INSNS (n);
21129 }
21130
21131 return cost;
21132 }
21133
21134 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21135
21136 static int
21137 rs6000_debug_address_cost (rtx x, machine_mode mode,
21138 addr_space_t as, bool speed)
21139 {
21140 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21141
21142 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21143 ret, speed ? "true" : "false");
21144 debug_rtx (x);
21145
21146 return ret;
21147 }
21148
21149
21150 /* A C expression returning the cost of moving data from a register of class
21151 CLASS1 to one of CLASS2. */
21152
21153 static int
21154 rs6000_register_move_cost (machine_mode mode,
21155 reg_class_t from, reg_class_t to)
21156 {
21157 int ret;
21158 reg_class_t rclass;
21159
21160 if (TARGET_DEBUG_COST)
21161 dbg_cost_ctrl++;
21162
21163 /* If we have VSX, we can easily move between FPR or Altivec registers,
21164 otherwise we can only easily move within classes.
21165 Do this first so we give best-case answers for union classes
21166 containing both gprs and vsx regs. */
21167 HARD_REG_SET to_vsx, from_vsx;
21168 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21169 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21170 if (!hard_reg_set_empty_p (to_vsx)
21171 && !hard_reg_set_empty_p (from_vsx)
21172 && (TARGET_VSX
21173 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21174 {
21175 int reg = FIRST_FPR_REGNO;
21176 if (TARGET_VSX
21177 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21178 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21179 reg = FIRST_ALTIVEC_REGNO;
21180 ret = 2 * hard_regno_nregs (reg, mode);
21181 }
21182
21183 /* Moves from/to GENERAL_REGS. */
21184 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21185 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21186 {
21187 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21188 {
21189 if (TARGET_DIRECT_MOVE)
21190 {
21191 /* Keep the cost for direct moves above that for within
21192 a register class even if the actual processor cost is
21193 comparable. We do this because a direct move insn
21194 can't be a nop, whereas with ideal register
21195 allocation a move within the same class might turn
21196 out to be a nop. */
21197 if (rs6000_tune == PROCESSOR_POWER9
21198 || rs6000_tune == PROCESSOR_FUTURE)
21199 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21200 else
21201 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21202 /* SFmode requires a conversion when moving between gprs
21203 and vsx. */
21204 if (mode == SFmode)
21205 ret += 2;
21206 }
21207 else
21208 ret = (rs6000_memory_move_cost (mode, rclass, false)
21209 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21210 }
21211
21212 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21213 shift. */
21214 else if (rclass == CR_REGS)
21215 ret = 4;
21216
21217 /* For those processors that have slow LR/CTR moves, make them more
21218 expensive than memory in order to bias spills to memory .*/
21219 else if ((rs6000_tune == PROCESSOR_POWER6
21220 || rs6000_tune == PROCESSOR_POWER7
21221 || rs6000_tune == PROCESSOR_POWER8
21222 || rs6000_tune == PROCESSOR_POWER9)
21223 && reg_class_subset_p (rclass, SPECIAL_REGS))
21224 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21225
21226 else
21227 /* A move will cost one instruction per GPR moved. */
21228 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21229 }
21230
21231 /* Everything else has to go through GENERAL_REGS. */
21232 else
21233 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21234 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21235
21236 if (TARGET_DEBUG_COST)
21237 {
21238 if (dbg_cost_ctrl == 1)
21239 fprintf (stderr,
21240 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21241 ret, GET_MODE_NAME (mode), reg_class_names[from],
21242 reg_class_names[to]);
21243 dbg_cost_ctrl--;
21244 }
21245
21246 return ret;
21247 }
21248
21249 /* A C expressions returning the cost of moving data of MODE from a register to
21250 or from memory. */
21251
21252 static int
21253 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21254 bool in ATTRIBUTE_UNUSED)
21255 {
21256 int ret;
21257
21258 if (TARGET_DEBUG_COST)
21259 dbg_cost_ctrl++;
21260
21261 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21262 ret = 4 * hard_regno_nregs (0, mode);
21263 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21264 || reg_classes_intersect_p (rclass, VSX_REGS)))
21265 ret = 4 * hard_regno_nregs (32, mode);
21266 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21267 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21268 else
21269 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21270
21271 if (TARGET_DEBUG_COST)
21272 {
21273 if (dbg_cost_ctrl == 1)
21274 fprintf (stderr,
21275 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21276 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21277 dbg_cost_ctrl--;
21278 }
21279
21280 return ret;
21281 }
21282
21283 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21284
21285 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21286 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21287 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21288 move cost between GENERAL_REGS and VSX_REGS low.
21289
21290 It might seem reasonable to use a union class. After all, if usage
21291 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21292 rather than memory. However, in cases where register pressure of
21293 both is high, like the cactus_adm spec test, allowing
21294 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21295 the first scheduling pass. This is partly due to an allocno of
21296 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21297 class, which gives too high a pressure for GENERAL_REGS and too low
21298 for VSX_REGS. So, force a choice of the subclass here.
21299
21300 The best class is also the union if GENERAL_REGS and VSX_REGS have
21301 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21302 allocno class, since trying to narrow down the class by regno mode
21303 is prone to error. For example, SImode is allowed in VSX regs and
21304 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21305 it would be wrong to choose an allocno of GENERAL_REGS based on
21306 SImode. */
21307
21308 static reg_class_t
21309 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21310 reg_class_t allocno_class,
21311 reg_class_t best_class)
21312 {
21313 switch (allocno_class)
21314 {
21315 case GEN_OR_VSX_REGS:
21316 /* best_class must be a subset of allocno_class. */
21317 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21318 || best_class == GEN_OR_FLOAT_REGS
21319 || best_class == VSX_REGS
21320 || best_class == ALTIVEC_REGS
21321 || best_class == FLOAT_REGS
21322 || best_class == GENERAL_REGS
21323 || best_class == BASE_REGS);
21324 /* Use best_class but choose wider classes when copying from the
21325 wider class to best_class is cheap. This mimics IRA choice
21326 of allocno class. */
21327 if (best_class == BASE_REGS)
21328 return GENERAL_REGS;
21329 if (TARGET_VSX
21330 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21331 return VSX_REGS;
21332 return best_class;
21333
21334 default:
21335 break;
21336 }
21337
21338 return allocno_class;
21339 }
21340
21341 /* Returns a code for a target-specific builtin that implements
21342 reciprocal of the function, or NULL_TREE if not available. */
21343
21344 static tree
21345 rs6000_builtin_reciprocal (tree fndecl)
21346 {
21347 switch (DECL_MD_FUNCTION_CODE (fndecl))
21348 {
21349 case VSX_BUILTIN_XVSQRTDP:
21350 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21351 return NULL_TREE;
21352
21353 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21354
21355 case VSX_BUILTIN_XVSQRTSP:
21356 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21357 return NULL_TREE;
21358
21359 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21360
21361 default:
21362 return NULL_TREE;
21363 }
21364 }
21365
21366 /* Load up a constant. If the mode is a vector mode, splat the value across
21367 all of the vector elements. */
21368
21369 static rtx
21370 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21371 {
21372 rtx reg;
21373
21374 if (mode == SFmode || mode == DFmode)
21375 {
21376 rtx d = const_double_from_real_value (dconst, mode);
21377 reg = force_reg (mode, d);
21378 }
21379 else if (mode == V4SFmode)
21380 {
21381 rtx d = const_double_from_real_value (dconst, SFmode);
21382 rtvec v = gen_rtvec (4, d, d, d, d);
21383 reg = gen_reg_rtx (mode);
21384 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21385 }
21386 else if (mode == V2DFmode)
21387 {
21388 rtx d = const_double_from_real_value (dconst, DFmode);
21389 rtvec v = gen_rtvec (2, d, d);
21390 reg = gen_reg_rtx (mode);
21391 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21392 }
21393 else
21394 gcc_unreachable ();
21395
21396 return reg;
21397 }
21398
21399 /* Generate an FMA instruction. */
21400
21401 static void
21402 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21403 {
21404 machine_mode mode = GET_MODE (target);
21405 rtx dst;
21406
21407 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21408 gcc_assert (dst != NULL);
21409
21410 if (dst != target)
21411 emit_move_insn (target, dst);
21412 }
21413
21414 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21415
21416 static void
21417 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21418 {
21419 machine_mode mode = GET_MODE (dst);
21420 rtx r;
21421
21422 /* This is a tad more complicated, since the fnma_optab is for
21423 a different expression: fma(-m1, m2, a), which is the same
21424 thing except in the case of signed zeros.
21425
21426 Fortunately we know that if FMA is supported that FNMSUB is
21427 also supported in the ISA. Just expand it directly. */
21428
21429 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21430
21431 r = gen_rtx_NEG (mode, a);
21432 r = gen_rtx_FMA (mode, m1, m2, r);
21433 r = gen_rtx_NEG (mode, r);
21434 emit_insn (gen_rtx_SET (dst, r));
21435 }
21436
21437 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21438 add a reg_note saying that this was a division. Support both scalar and
21439 vector divide. Assumes no trapping math and finite arguments. */
21440
21441 void
21442 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21443 {
21444 machine_mode mode = GET_MODE (dst);
21445 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21446 int i;
21447
21448 /* Low precision estimates guarantee 5 bits of accuracy. High
21449 precision estimates guarantee 14 bits of accuracy. SFmode
21450 requires 23 bits of accuracy. DFmode requires 52 bits of
21451 accuracy. Each pass at least doubles the accuracy, leading
21452 to the following. */
21453 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21454 if (mode == DFmode || mode == V2DFmode)
21455 passes++;
21456
21457 enum insn_code code = optab_handler (smul_optab, mode);
21458 insn_gen_fn gen_mul = GEN_FCN (code);
21459
21460 gcc_assert (code != CODE_FOR_nothing);
21461
21462 one = rs6000_load_constant_and_splat (mode, dconst1);
21463
21464 /* x0 = 1./d estimate */
21465 x0 = gen_reg_rtx (mode);
21466 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21467 UNSPEC_FRES)));
21468
21469 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21470 if (passes > 1) {
21471
21472 /* e0 = 1. - d * x0 */
21473 e0 = gen_reg_rtx (mode);
21474 rs6000_emit_nmsub (e0, d, x0, one);
21475
21476 /* x1 = x0 + e0 * x0 */
21477 x1 = gen_reg_rtx (mode);
21478 rs6000_emit_madd (x1, e0, x0, x0);
21479
21480 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21481 ++i, xprev = xnext, eprev = enext) {
21482
21483 /* enext = eprev * eprev */
21484 enext = gen_reg_rtx (mode);
21485 emit_insn (gen_mul (enext, eprev, eprev));
21486
21487 /* xnext = xprev + enext * xprev */
21488 xnext = gen_reg_rtx (mode);
21489 rs6000_emit_madd (xnext, enext, xprev, xprev);
21490 }
21491
21492 } else
21493 xprev = x0;
21494
21495 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21496
21497 /* u = n * xprev */
21498 u = gen_reg_rtx (mode);
21499 emit_insn (gen_mul (u, n, xprev));
21500
21501 /* v = n - (d * u) */
21502 v = gen_reg_rtx (mode);
21503 rs6000_emit_nmsub (v, d, u, n);
21504
21505 /* dst = (v * xprev) + u */
21506 rs6000_emit_madd (dst, v, xprev, u);
21507
21508 if (note_p)
21509 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21510 }
21511
21512 /* Goldschmidt's Algorithm for single/double-precision floating point
21513 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21514
21515 void
21516 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21517 {
21518 machine_mode mode = GET_MODE (src);
21519 rtx e = gen_reg_rtx (mode);
21520 rtx g = gen_reg_rtx (mode);
21521 rtx h = gen_reg_rtx (mode);
21522
21523 /* Low precision estimates guarantee 5 bits of accuracy. High
21524 precision estimates guarantee 14 bits of accuracy. SFmode
21525 requires 23 bits of accuracy. DFmode requires 52 bits of
21526 accuracy. Each pass at least doubles the accuracy, leading
21527 to the following. */
21528 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21529 if (mode == DFmode || mode == V2DFmode)
21530 passes++;
21531
21532 int i;
21533 rtx mhalf;
21534 enum insn_code code = optab_handler (smul_optab, mode);
21535 insn_gen_fn gen_mul = GEN_FCN (code);
21536
21537 gcc_assert (code != CODE_FOR_nothing);
21538
21539 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21540
21541 /* e = rsqrt estimate */
21542 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21543 UNSPEC_RSQRT)));
21544
21545 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21546 if (!recip)
21547 {
21548 rtx zero = force_reg (mode, CONST0_RTX (mode));
21549
21550 if (mode == SFmode)
21551 {
21552 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21553 e, zero, mode, 0);
21554 if (target != e)
21555 emit_move_insn (e, target);
21556 }
21557 else
21558 {
21559 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21560 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21561 }
21562 }
21563
21564 /* g = sqrt estimate. */
21565 emit_insn (gen_mul (g, e, src));
21566 /* h = 1/(2*sqrt) estimate. */
21567 emit_insn (gen_mul (h, e, mhalf));
21568
21569 if (recip)
21570 {
21571 if (passes == 1)
21572 {
21573 rtx t = gen_reg_rtx (mode);
21574 rs6000_emit_nmsub (t, g, h, mhalf);
21575 /* Apply correction directly to 1/rsqrt estimate. */
21576 rs6000_emit_madd (dst, e, t, e);
21577 }
21578 else
21579 {
21580 for (i = 0; i < passes; i++)
21581 {
21582 rtx t1 = gen_reg_rtx (mode);
21583 rtx g1 = gen_reg_rtx (mode);
21584 rtx h1 = gen_reg_rtx (mode);
21585
21586 rs6000_emit_nmsub (t1, g, h, mhalf);
21587 rs6000_emit_madd (g1, g, t1, g);
21588 rs6000_emit_madd (h1, h, t1, h);
21589
21590 g = g1;
21591 h = h1;
21592 }
21593 /* Multiply by 2 for 1/rsqrt. */
21594 emit_insn (gen_add3_insn (dst, h, h));
21595 }
21596 }
21597 else
21598 {
21599 rtx t = gen_reg_rtx (mode);
21600 rs6000_emit_nmsub (t, g, h, mhalf);
21601 rs6000_emit_madd (dst, g, t, g);
21602 }
21603
21604 return;
21605 }
21606
21607 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21608 (Power7) targets. DST is the target, and SRC is the argument operand. */
21609
21610 void
21611 rs6000_emit_popcount (rtx dst, rtx src)
21612 {
21613 machine_mode mode = GET_MODE (dst);
21614 rtx tmp1, tmp2;
21615
21616 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21617 if (TARGET_POPCNTD)
21618 {
21619 if (mode == SImode)
21620 emit_insn (gen_popcntdsi2 (dst, src));
21621 else
21622 emit_insn (gen_popcntddi2 (dst, src));
21623 return;
21624 }
21625
21626 tmp1 = gen_reg_rtx (mode);
21627
21628 if (mode == SImode)
21629 {
21630 emit_insn (gen_popcntbsi2 (tmp1, src));
21631 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21632 NULL_RTX, 0);
21633 tmp2 = force_reg (SImode, tmp2);
21634 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21635 }
21636 else
21637 {
21638 emit_insn (gen_popcntbdi2 (tmp1, src));
21639 tmp2 = expand_mult (DImode, tmp1,
21640 GEN_INT ((HOST_WIDE_INT)
21641 0x01010101 << 32 | 0x01010101),
21642 NULL_RTX, 0);
21643 tmp2 = force_reg (DImode, tmp2);
21644 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21645 }
21646 }
21647
21648
21649 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21650 target, and SRC is the argument operand. */
21651
21652 void
21653 rs6000_emit_parity (rtx dst, rtx src)
21654 {
21655 machine_mode mode = GET_MODE (dst);
21656 rtx tmp;
21657
21658 tmp = gen_reg_rtx (mode);
21659
21660 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21661 if (TARGET_CMPB)
21662 {
21663 if (mode == SImode)
21664 {
21665 emit_insn (gen_popcntbsi2 (tmp, src));
21666 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21667 }
21668 else
21669 {
21670 emit_insn (gen_popcntbdi2 (tmp, src));
21671 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21672 }
21673 return;
21674 }
21675
21676 if (mode == SImode)
21677 {
21678 /* Is mult+shift >= shift+xor+shift+xor? */
21679 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21680 {
21681 rtx tmp1, tmp2, tmp3, tmp4;
21682
21683 tmp1 = gen_reg_rtx (SImode);
21684 emit_insn (gen_popcntbsi2 (tmp1, src));
21685
21686 tmp2 = gen_reg_rtx (SImode);
21687 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21688 tmp3 = gen_reg_rtx (SImode);
21689 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21690
21691 tmp4 = gen_reg_rtx (SImode);
21692 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21693 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21694 }
21695 else
21696 rs6000_emit_popcount (tmp, src);
21697 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21698 }
21699 else
21700 {
21701 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21702 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21703 {
21704 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21705
21706 tmp1 = gen_reg_rtx (DImode);
21707 emit_insn (gen_popcntbdi2 (tmp1, src));
21708
21709 tmp2 = gen_reg_rtx (DImode);
21710 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21711 tmp3 = gen_reg_rtx (DImode);
21712 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21713
21714 tmp4 = gen_reg_rtx (DImode);
21715 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21716 tmp5 = gen_reg_rtx (DImode);
21717 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21718
21719 tmp6 = gen_reg_rtx (DImode);
21720 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21721 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21722 }
21723 else
21724 rs6000_emit_popcount (tmp, src);
21725 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21726 }
21727 }
21728
21729 /* Expand an Altivec constant permutation for little endian mode.
21730 OP0 and OP1 are the input vectors and TARGET is the output vector.
21731 SEL specifies the constant permutation vector.
21732
21733 There are two issues: First, the two input operands must be
21734 swapped so that together they form a double-wide array in LE
21735 order. Second, the vperm instruction has surprising behavior
21736 in LE mode: it interprets the elements of the source vectors
21737 in BE mode ("left to right") and interprets the elements of
21738 the destination vector in LE mode ("right to left"). To
21739 correct for this, we must subtract each element of the permute
21740 control vector from 31.
21741
21742 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21743 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21744 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21745 serve as the permute control vector. Then, in BE mode,
21746
21747 vperm 9,10,11,12
21748
21749 places the desired result in vr9. However, in LE mode the
21750 vector contents will be
21751
21752 vr10 = 00000003 00000002 00000001 00000000
21753 vr11 = 00000007 00000006 00000005 00000004
21754
21755 The result of the vperm using the same permute control vector is
21756
21757 vr9 = 05000000 07000000 01000000 03000000
21758
21759 That is, the leftmost 4 bytes of vr10 are interpreted as the
21760 source for the rightmost 4 bytes of vr9, and so on.
21761
21762 If we change the permute control vector to
21763
21764 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21765
21766 and issue
21767
21768 vperm 9,11,10,12
21769
21770 we get the desired
21771
21772 vr9 = 00000006 00000004 00000002 00000000. */
21773
21774 static void
21775 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21776 const vec_perm_indices &sel)
21777 {
21778 unsigned int i;
21779 rtx perm[16];
21780 rtx constv, unspec;
21781
21782 /* Unpack and adjust the constant selector. */
21783 for (i = 0; i < 16; ++i)
21784 {
21785 unsigned int elt = 31 - (sel[i] & 31);
21786 perm[i] = GEN_INT (elt);
21787 }
21788
21789 /* Expand to a permute, swapping the inputs and using the
21790 adjusted selector. */
21791 if (!REG_P (op0))
21792 op0 = force_reg (V16QImode, op0);
21793 if (!REG_P (op1))
21794 op1 = force_reg (V16QImode, op1);
21795
21796 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21797 constv = force_reg (V16QImode, constv);
21798 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21799 UNSPEC_VPERM);
21800 if (!REG_P (target))
21801 {
21802 rtx tmp = gen_reg_rtx (V16QImode);
21803 emit_move_insn (tmp, unspec);
21804 unspec = tmp;
21805 }
21806
21807 emit_move_insn (target, unspec);
21808 }
21809
21810 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21811 permute control vector. But here it's not a constant, so we must
21812 generate a vector NAND or NOR to do the adjustment. */
21813
21814 void
21815 altivec_expand_vec_perm_le (rtx operands[4])
21816 {
21817 rtx notx, iorx, unspec;
21818 rtx target = operands[0];
21819 rtx op0 = operands[1];
21820 rtx op1 = operands[2];
21821 rtx sel = operands[3];
21822 rtx tmp = target;
21823 rtx norreg = gen_reg_rtx (V16QImode);
21824 machine_mode mode = GET_MODE (target);
21825
21826 /* Get everything in regs so the pattern matches. */
21827 if (!REG_P (op0))
21828 op0 = force_reg (mode, op0);
21829 if (!REG_P (op1))
21830 op1 = force_reg (mode, op1);
21831 if (!REG_P (sel))
21832 sel = force_reg (V16QImode, sel);
21833 if (!REG_P (target))
21834 tmp = gen_reg_rtx (mode);
21835
21836 if (TARGET_P9_VECTOR)
21837 {
21838 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21839 UNSPEC_VPERMR);
21840 }
21841 else
21842 {
21843 /* Invert the selector with a VNAND if available, else a VNOR.
21844 The VNAND is preferred for future fusion opportunities. */
21845 notx = gen_rtx_NOT (V16QImode, sel);
21846 iorx = (TARGET_P8_VECTOR
21847 ? gen_rtx_IOR (V16QImode, notx, notx)
21848 : gen_rtx_AND (V16QImode, notx, notx));
21849 emit_insn (gen_rtx_SET (norreg, iorx));
21850
21851 /* Permute with operands reversed and adjusted selector. */
21852 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21853 UNSPEC_VPERM);
21854 }
21855
21856 /* Copy into target, possibly by way of a register. */
21857 if (!REG_P (target))
21858 {
21859 emit_move_insn (tmp, unspec);
21860 unspec = tmp;
21861 }
21862
21863 emit_move_insn (target, unspec);
21864 }
21865
21866 /* Expand an Altivec constant permutation. Return true if we match
21867 an efficient implementation; false to fall back to VPERM.
21868
21869 OP0 and OP1 are the input vectors and TARGET is the output vector.
21870 SEL specifies the constant permutation vector. */
21871
21872 static bool
21873 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21874 const vec_perm_indices &sel)
21875 {
21876 struct altivec_perm_insn {
21877 HOST_WIDE_INT mask;
21878 enum insn_code impl;
21879 unsigned char perm[16];
21880 };
21881 static const struct altivec_perm_insn patterns[] = {
21882 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21883 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21884 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21885 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21886 { OPTION_MASK_ALTIVEC,
21887 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21888 : CODE_FOR_altivec_vmrglb_direct),
21889 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21890 { OPTION_MASK_ALTIVEC,
21891 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21892 : CODE_FOR_altivec_vmrglh_direct),
21893 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21894 { OPTION_MASK_ALTIVEC,
21895 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21896 : CODE_FOR_altivec_vmrglw_direct),
21897 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21898 { OPTION_MASK_ALTIVEC,
21899 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21900 : CODE_FOR_altivec_vmrghb_direct),
21901 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21902 { OPTION_MASK_ALTIVEC,
21903 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21904 : CODE_FOR_altivec_vmrghh_direct),
21905 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21906 { OPTION_MASK_ALTIVEC,
21907 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21908 : CODE_FOR_altivec_vmrghw_direct),
21909 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21910 { OPTION_MASK_P8_VECTOR,
21911 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21912 : CODE_FOR_p8_vmrgow_v4sf_direct),
21913 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21914 { OPTION_MASK_P8_VECTOR,
21915 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21916 : CODE_FOR_p8_vmrgew_v4sf_direct),
21917 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21918 };
21919
21920 unsigned int i, j, elt, which;
21921 unsigned char perm[16];
21922 rtx x;
21923 bool one_vec;
21924
21925 /* Unpack the constant selector. */
21926 for (i = which = 0; i < 16; ++i)
21927 {
21928 elt = sel[i] & 31;
21929 which |= (elt < 16 ? 1 : 2);
21930 perm[i] = elt;
21931 }
21932
21933 /* Simplify the constant selector based on operands. */
21934 switch (which)
21935 {
21936 default:
21937 gcc_unreachable ();
21938
21939 case 3:
21940 one_vec = false;
21941 if (!rtx_equal_p (op0, op1))
21942 break;
21943 /* FALLTHRU */
21944
21945 case 2:
21946 for (i = 0; i < 16; ++i)
21947 perm[i] &= 15;
21948 op0 = op1;
21949 one_vec = true;
21950 break;
21951
21952 case 1:
21953 op1 = op0;
21954 one_vec = true;
21955 break;
21956 }
21957
21958 /* Look for splat patterns. */
21959 if (one_vec)
21960 {
21961 elt = perm[0];
21962
21963 for (i = 0; i < 16; ++i)
21964 if (perm[i] != elt)
21965 break;
21966 if (i == 16)
21967 {
21968 if (!BYTES_BIG_ENDIAN)
21969 elt = 15 - elt;
21970 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
21971 return true;
21972 }
21973
21974 if (elt % 2 == 0)
21975 {
21976 for (i = 0; i < 16; i += 2)
21977 if (perm[i] != elt || perm[i + 1] != elt + 1)
21978 break;
21979 if (i == 16)
21980 {
21981 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
21982 x = gen_reg_rtx (V8HImode);
21983 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
21984 GEN_INT (field)));
21985 emit_move_insn (target, gen_lowpart (V16QImode, x));
21986 return true;
21987 }
21988 }
21989
21990 if (elt % 4 == 0)
21991 {
21992 for (i = 0; i < 16; i += 4)
21993 if (perm[i] != elt
21994 || perm[i + 1] != elt + 1
21995 || perm[i + 2] != elt + 2
21996 || perm[i + 3] != elt + 3)
21997 break;
21998 if (i == 16)
21999 {
22000 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22001 x = gen_reg_rtx (V4SImode);
22002 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22003 GEN_INT (field)));
22004 emit_move_insn (target, gen_lowpart (V16QImode, x));
22005 return true;
22006 }
22007 }
22008 }
22009
22010 /* Look for merge and pack patterns. */
22011 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22012 {
22013 bool swapped;
22014
22015 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22016 continue;
22017
22018 elt = patterns[j].perm[0];
22019 if (perm[0] == elt)
22020 swapped = false;
22021 else if (perm[0] == elt + 16)
22022 swapped = true;
22023 else
22024 continue;
22025 for (i = 1; i < 16; ++i)
22026 {
22027 elt = patterns[j].perm[i];
22028 if (swapped)
22029 elt = (elt >= 16 ? elt - 16 : elt + 16);
22030 else if (one_vec && elt >= 16)
22031 elt -= 16;
22032 if (perm[i] != elt)
22033 break;
22034 }
22035 if (i == 16)
22036 {
22037 enum insn_code icode = patterns[j].impl;
22038 machine_mode omode = insn_data[icode].operand[0].mode;
22039 machine_mode imode = insn_data[icode].operand[1].mode;
22040
22041 /* For little-endian, don't use vpkuwum and vpkuhum if the
22042 underlying vector type is not V4SI and V8HI, respectively.
22043 For example, using vpkuwum with a V8HI picks up the even
22044 halfwords (BE numbering) when the even halfwords (LE
22045 numbering) are what we need. */
22046 if (!BYTES_BIG_ENDIAN
22047 && icode == CODE_FOR_altivec_vpkuwum_direct
22048 && ((REG_P (op0)
22049 && GET_MODE (op0) != V4SImode)
22050 || (SUBREG_P (op0)
22051 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22052 continue;
22053 if (!BYTES_BIG_ENDIAN
22054 && icode == CODE_FOR_altivec_vpkuhum_direct
22055 && ((REG_P (op0)
22056 && GET_MODE (op0) != V8HImode)
22057 || (SUBREG_P (op0)
22058 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22059 continue;
22060
22061 /* For little-endian, the two input operands must be swapped
22062 (or swapped back) to ensure proper right-to-left numbering
22063 from 0 to 2N-1. */
22064 if (swapped ^ !BYTES_BIG_ENDIAN)
22065 std::swap (op0, op1);
22066 if (imode != V16QImode)
22067 {
22068 op0 = gen_lowpart (imode, op0);
22069 op1 = gen_lowpart (imode, op1);
22070 }
22071 if (omode == V16QImode)
22072 x = target;
22073 else
22074 x = gen_reg_rtx (omode);
22075 emit_insn (GEN_FCN (icode) (x, op0, op1));
22076 if (omode != V16QImode)
22077 emit_move_insn (target, gen_lowpart (V16QImode, x));
22078 return true;
22079 }
22080 }
22081
22082 if (!BYTES_BIG_ENDIAN)
22083 {
22084 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22085 return true;
22086 }
22087
22088 return false;
22089 }
22090
22091 /* Expand a VSX Permute Doubleword constant permutation.
22092 Return true if we match an efficient implementation. */
22093
22094 static bool
22095 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22096 unsigned char perm0, unsigned char perm1)
22097 {
22098 rtx x;
22099
22100 /* If both selectors come from the same operand, fold to single op. */
22101 if ((perm0 & 2) == (perm1 & 2))
22102 {
22103 if (perm0 & 2)
22104 op0 = op1;
22105 else
22106 op1 = op0;
22107 }
22108 /* If both operands are equal, fold to simpler permutation. */
22109 if (rtx_equal_p (op0, op1))
22110 {
22111 perm0 = perm0 & 1;
22112 perm1 = (perm1 & 1) + 2;
22113 }
22114 /* If the first selector comes from the second operand, swap. */
22115 else if (perm0 & 2)
22116 {
22117 if (perm1 & 2)
22118 return false;
22119 perm0 -= 2;
22120 perm1 += 2;
22121 std::swap (op0, op1);
22122 }
22123 /* If the second selector does not come from the second operand, fail. */
22124 else if ((perm1 & 2) == 0)
22125 return false;
22126
22127 /* Success! */
22128 if (target != NULL)
22129 {
22130 machine_mode vmode, dmode;
22131 rtvec v;
22132
22133 vmode = GET_MODE (target);
22134 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22135 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22136 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22137 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22138 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22139 emit_insn (gen_rtx_SET (target, x));
22140 }
22141 return true;
22142 }
22143
22144 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22145
22146 static bool
22147 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22148 rtx op1, const vec_perm_indices &sel)
22149 {
22150 bool testing_p = !target;
22151
22152 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22153 if (TARGET_ALTIVEC && testing_p)
22154 return true;
22155
22156 /* Check for ps_merge* or xxpermdi insns. */
22157 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22158 {
22159 if (testing_p)
22160 {
22161 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22162 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22163 }
22164 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22165 return true;
22166 }
22167
22168 if (TARGET_ALTIVEC)
22169 {
22170 /* Force the target-independent code to lower to V16QImode. */
22171 if (vmode != V16QImode)
22172 return false;
22173 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22174 return true;
22175 }
22176
22177 return false;
22178 }
22179
22180 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22181 OP0 and OP1 are the input vectors and TARGET is the output vector.
22182 PERM specifies the constant permutation vector. */
22183
22184 static void
22185 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22186 machine_mode vmode, const vec_perm_builder &perm)
22187 {
22188 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22189 if (x != target)
22190 emit_move_insn (target, x);
22191 }
22192
22193 /* Expand an extract even operation. */
22194
22195 void
22196 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22197 {
22198 machine_mode vmode = GET_MODE (target);
22199 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22200 vec_perm_builder perm (nelt, nelt, 1);
22201
22202 for (i = 0; i < nelt; i++)
22203 perm.quick_push (i * 2);
22204
22205 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22206 }
22207
22208 /* Expand a vector interleave operation. */
22209
22210 void
22211 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22212 {
22213 machine_mode vmode = GET_MODE (target);
22214 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22215 vec_perm_builder perm (nelt, nelt, 1);
22216
22217 high = (highp ? 0 : nelt / 2);
22218 for (i = 0; i < nelt / 2; i++)
22219 {
22220 perm.quick_push (i + high);
22221 perm.quick_push (i + nelt + high);
22222 }
22223
22224 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22225 }
22226
22227 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22228 void
22229 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22230 {
22231 HOST_WIDE_INT hwi_scale (scale);
22232 REAL_VALUE_TYPE r_pow;
22233 rtvec v = rtvec_alloc (2);
22234 rtx elt;
22235 rtx scale_vec = gen_reg_rtx (V2DFmode);
22236 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22237 elt = const_double_from_real_value (r_pow, DFmode);
22238 RTVEC_ELT (v, 0) = elt;
22239 RTVEC_ELT (v, 1) = elt;
22240 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22241 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22242 }
22243
22244 /* Return an RTX representing where to find the function value of a
22245 function returning MODE. */
22246 static rtx
22247 rs6000_complex_function_value (machine_mode mode)
22248 {
22249 unsigned int regno;
22250 rtx r1, r2;
22251 machine_mode inner = GET_MODE_INNER (mode);
22252 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22253
22254 if (TARGET_FLOAT128_TYPE
22255 && (mode == KCmode
22256 || (mode == TCmode && TARGET_IEEEQUAD)))
22257 regno = ALTIVEC_ARG_RETURN;
22258
22259 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22260 regno = FP_ARG_RETURN;
22261
22262 else
22263 {
22264 regno = GP_ARG_RETURN;
22265
22266 /* 32-bit is OK since it'll go in r3/r4. */
22267 if (TARGET_32BIT && inner_bytes >= 4)
22268 return gen_rtx_REG (mode, regno);
22269 }
22270
22271 if (inner_bytes >= 8)
22272 return gen_rtx_REG (mode, regno);
22273
22274 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22275 const0_rtx);
22276 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22277 GEN_INT (inner_bytes));
22278 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22279 }
22280
22281 /* Return an rtx describing a return value of MODE as a PARALLEL
22282 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22283 stride REG_STRIDE. */
22284
22285 static rtx
22286 rs6000_parallel_return (machine_mode mode,
22287 int n_elts, machine_mode elt_mode,
22288 unsigned int regno, unsigned int reg_stride)
22289 {
22290 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22291
22292 int i;
22293 for (i = 0; i < n_elts; i++)
22294 {
22295 rtx r = gen_rtx_REG (elt_mode, regno);
22296 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22297 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22298 regno += reg_stride;
22299 }
22300
22301 return par;
22302 }
22303
22304 /* Target hook for TARGET_FUNCTION_VALUE.
22305
22306 An integer value is in r3 and a floating-point value is in fp1,
22307 unless -msoft-float. */
22308
22309 static rtx
22310 rs6000_function_value (const_tree valtype,
22311 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22312 bool outgoing ATTRIBUTE_UNUSED)
22313 {
22314 machine_mode mode;
22315 unsigned int regno;
22316 machine_mode elt_mode;
22317 int n_elts;
22318
22319 /* Special handling for structs in darwin64. */
22320 if (TARGET_MACHO
22321 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22322 {
22323 CUMULATIVE_ARGS valcum;
22324 rtx valret;
22325
22326 valcum.words = 0;
22327 valcum.fregno = FP_ARG_MIN_REG;
22328 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22329 /* Do a trial code generation as if this were going to be passed as
22330 an argument; if any part goes in memory, we return NULL. */
22331 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22332 if (valret)
22333 return valret;
22334 /* Otherwise fall through to standard ABI rules. */
22335 }
22336
22337 mode = TYPE_MODE (valtype);
22338
22339 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22340 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22341 {
22342 int first_reg, n_regs;
22343
22344 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22345 {
22346 /* _Decimal128 must use even/odd register pairs. */
22347 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22348 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22349 }
22350 else
22351 {
22352 first_reg = ALTIVEC_ARG_RETURN;
22353 n_regs = 1;
22354 }
22355
22356 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22357 }
22358
22359 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22360 if (TARGET_32BIT && TARGET_POWERPC64)
22361 switch (mode)
22362 {
22363 default:
22364 break;
22365 case E_DImode:
22366 case E_SCmode:
22367 case E_DCmode:
22368 case E_TCmode:
22369 int count = GET_MODE_SIZE (mode) / 4;
22370 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22371 }
22372
22373 if ((INTEGRAL_TYPE_P (valtype)
22374 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22375 || POINTER_TYPE_P (valtype))
22376 mode = TARGET_32BIT ? SImode : DImode;
22377
22378 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22379 /* _Decimal128 must use an even/odd register pair. */
22380 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22381 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22382 && !FLOAT128_VECTOR_P (mode))
22383 regno = FP_ARG_RETURN;
22384 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22385 && targetm.calls.split_complex_arg)
22386 return rs6000_complex_function_value (mode);
22387 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22388 return register is used in both cases, and we won't see V2DImode/V2DFmode
22389 for pure altivec, combine the two cases. */
22390 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22391 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22392 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22393 regno = ALTIVEC_ARG_RETURN;
22394 else
22395 regno = GP_ARG_RETURN;
22396
22397 return gen_rtx_REG (mode, regno);
22398 }
22399
22400 /* Define how to find the value returned by a library function
22401 assuming the value has mode MODE. */
22402 rtx
22403 rs6000_libcall_value (machine_mode mode)
22404 {
22405 unsigned int regno;
22406
22407 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22408 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22409 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22410
22411 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22412 /* _Decimal128 must use an even/odd register pair. */
22413 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22414 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22415 regno = FP_ARG_RETURN;
22416 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22417 return register is used in both cases, and we won't see V2DImode/V2DFmode
22418 for pure altivec, combine the two cases. */
22419 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22420 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22421 regno = ALTIVEC_ARG_RETURN;
22422 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22423 return rs6000_complex_function_value (mode);
22424 else
22425 regno = GP_ARG_RETURN;
22426
22427 return gen_rtx_REG (mode, regno);
22428 }
22429
22430 /* Compute register pressure classes. We implement the target hook to avoid
22431 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22432 lead to incorrect estimates of number of available registers and therefor
22433 increased register pressure/spill. */
22434 static int
22435 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22436 {
22437 int n;
22438
22439 n = 0;
22440 pressure_classes[n++] = GENERAL_REGS;
22441 if (TARGET_VSX)
22442 pressure_classes[n++] = VSX_REGS;
22443 else
22444 {
22445 if (TARGET_ALTIVEC)
22446 pressure_classes[n++] = ALTIVEC_REGS;
22447 if (TARGET_HARD_FLOAT)
22448 pressure_classes[n++] = FLOAT_REGS;
22449 }
22450 pressure_classes[n++] = CR_REGS;
22451 pressure_classes[n++] = SPECIAL_REGS;
22452
22453 return n;
22454 }
22455
22456 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22457 Frame pointer elimination is automatically handled.
22458
22459 For the RS/6000, if frame pointer elimination is being done, we would like
22460 to convert ap into fp, not sp.
22461
22462 We need r30 if -mminimal-toc was specified, and there are constant pool
22463 references. */
22464
22465 static bool
22466 rs6000_can_eliminate (const int from, const int to)
22467 {
22468 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22469 ? ! frame_pointer_needed
22470 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22471 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22472 || constant_pool_empty_p ()
22473 : true);
22474 }
22475
22476 /* Define the offset between two registers, FROM to be eliminated and its
22477 replacement TO, at the start of a routine. */
22478 HOST_WIDE_INT
22479 rs6000_initial_elimination_offset (int from, int to)
22480 {
22481 rs6000_stack_t *info = rs6000_stack_info ();
22482 HOST_WIDE_INT offset;
22483
22484 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22485 offset = info->push_p ? 0 : -info->total_size;
22486 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22487 {
22488 offset = info->push_p ? 0 : -info->total_size;
22489 if (FRAME_GROWS_DOWNWARD)
22490 offset += info->fixed_size + info->vars_size + info->parm_size;
22491 }
22492 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22493 offset = FRAME_GROWS_DOWNWARD
22494 ? info->fixed_size + info->vars_size + info->parm_size
22495 : 0;
22496 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22497 offset = info->total_size;
22498 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22499 offset = info->push_p ? info->total_size : 0;
22500 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22501 offset = 0;
22502 else
22503 gcc_unreachable ();
22504
22505 return offset;
22506 }
22507
22508 /* Fill in sizes of registers used by unwinder. */
22509
22510 static void
22511 rs6000_init_dwarf_reg_sizes_extra (tree address)
22512 {
22513 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22514 {
22515 int i;
22516 machine_mode mode = TYPE_MODE (char_type_node);
22517 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22518 rtx mem = gen_rtx_MEM (BLKmode, addr);
22519 rtx value = gen_int_mode (16, mode);
22520
22521 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22522 The unwinder still needs to know the size of Altivec registers. */
22523
22524 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22525 {
22526 int column = DWARF_REG_TO_UNWIND_COLUMN
22527 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22528 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22529
22530 emit_move_insn (adjust_address (mem, mode, offset), value);
22531 }
22532 }
22533 }
22534
22535 /* Map internal gcc register numbers to debug format register numbers.
22536 FORMAT specifies the type of debug register number to use:
22537 0 -- debug information, except for frame-related sections
22538 1 -- DWARF .debug_frame section
22539 2 -- DWARF .eh_frame section */
22540
22541 unsigned int
22542 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22543 {
22544 /* On some platforms, we use the standard DWARF register
22545 numbering for .debug_info and .debug_frame. */
22546 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22547 {
22548 #ifdef RS6000_USE_DWARF_NUMBERING
22549 if (regno <= 31)
22550 return regno;
22551 if (FP_REGNO_P (regno))
22552 return regno - FIRST_FPR_REGNO + 32;
22553 if (ALTIVEC_REGNO_P (regno))
22554 return regno - FIRST_ALTIVEC_REGNO + 1124;
22555 if (regno == LR_REGNO)
22556 return 108;
22557 if (regno == CTR_REGNO)
22558 return 109;
22559 if (regno == CA_REGNO)
22560 return 101; /* XER */
22561 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22562 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22563 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22564 to the DWARF reg for CR. */
22565 if (format == 1 && regno == CR2_REGNO)
22566 return 64;
22567 if (CR_REGNO_P (regno))
22568 return regno - CR0_REGNO + 86;
22569 if (regno == VRSAVE_REGNO)
22570 return 356;
22571 if (regno == VSCR_REGNO)
22572 return 67;
22573
22574 /* These do not make much sense. */
22575 if (regno == FRAME_POINTER_REGNUM)
22576 return 111;
22577 if (regno == ARG_POINTER_REGNUM)
22578 return 67;
22579 if (regno == 64)
22580 return 100;
22581
22582 gcc_unreachable ();
22583 #endif
22584 }
22585
22586 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22587 information, and also for .eh_frame. */
22588 /* Translate the regnos to their numbers in GCC 7 (and before). */
22589 if (regno <= 31)
22590 return regno;
22591 if (FP_REGNO_P (regno))
22592 return regno - FIRST_FPR_REGNO + 32;
22593 if (ALTIVEC_REGNO_P (regno))
22594 return regno - FIRST_ALTIVEC_REGNO + 77;
22595 if (regno == LR_REGNO)
22596 return 65;
22597 if (regno == CTR_REGNO)
22598 return 66;
22599 if (regno == CA_REGNO)
22600 return 76; /* XER */
22601 if (CR_REGNO_P (regno))
22602 return regno - CR0_REGNO + 68;
22603 if (regno == VRSAVE_REGNO)
22604 return 109;
22605 if (regno == VSCR_REGNO)
22606 return 110;
22607
22608 if (regno == FRAME_POINTER_REGNUM)
22609 return 111;
22610 if (regno == ARG_POINTER_REGNUM)
22611 return 67;
22612 if (regno == 64)
22613 return 64;
22614
22615 gcc_unreachable ();
22616 }
22617
22618 /* target hook eh_return_filter_mode */
22619 static scalar_int_mode
22620 rs6000_eh_return_filter_mode (void)
22621 {
22622 return TARGET_32BIT ? SImode : word_mode;
22623 }
22624
22625 /* Target hook for translate_mode_attribute. */
22626 static machine_mode
22627 rs6000_translate_mode_attribute (machine_mode mode)
22628 {
22629 if ((FLOAT128_IEEE_P (mode)
22630 && ieee128_float_type_node == long_double_type_node)
22631 || (FLOAT128_IBM_P (mode)
22632 && ibm128_float_type_node == long_double_type_node))
22633 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22634 return mode;
22635 }
22636
22637 /* Target hook for scalar_mode_supported_p. */
22638 static bool
22639 rs6000_scalar_mode_supported_p (scalar_mode mode)
22640 {
22641 /* -m32 does not support TImode. This is the default, from
22642 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22643 same ABI as for -m32. But default_scalar_mode_supported_p allows
22644 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22645 for -mpowerpc64. */
22646 if (TARGET_32BIT && mode == TImode)
22647 return false;
22648
22649 if (DECIMAL_FLOAT_MODE_P (mode))
22650 return default_decimal_float_supported_p ();
22651 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22652 return true;
22653 else
22654 return default_scalar_mode_supported_p (mode);
22655 }
22656
22657 /* Target hook for vector_mode_supported_p. */
22658 static bool
22659 rs6000_vector_mode_supported_p (machine_mode mode)
22660 {
22661 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22662 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22663 double-double. */
22664 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22665 return true;
22666
22667 else
22668 return false;
22669 }
22670
22671 /* Target hook for floatn_mode. */
22672 static opt_scalar_float_mode
22673 rs6000_floatn_mode (int n, bool extended)
22674 {
22675 if (extended)
22676 {
22677 switch (n)
22678 {
22679 case 32:
22680 return DFmode;
22681
22682 case 64:
22683 if (TARGET_FLOAT128_TYPE)
22684 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22685 else
22686 return opt_scalar_float_mode ();
22687
22688 case 128:
22689 return opt_scalar_float_mode ();
22690
22691 default:
22692 /* Those are the only valid _FloatNx types. */
22693 gcc_unreachable ();
22694 }
22695 }
22696 else
22697 {
22698 switch (n)
22699 {
22700 case 32:
22701 return SFmode;
22702
22703 case 64:
22704 return DFmode;
22705
22706 case 128:
22707 if (TARGET_FLOAT128_TYPE)
22708 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22709 else
22710 return opt_scalar_float_mode ();
22711
22712 default:
22713 return opt_scalar_float_mode ();
22714 }
22715 }
22716
22717 }
22718
22719 /* Target hook for c_mode_for_suffix. */
22720 static machine_mode
22721 rs6000_c_mode_for_suffix (char suffix)
22722 {
22723 if (TARGET_FLOAT128_TYPE)
22724 {
22725 if (suffix == 'q' || suffix == 'Q')
22726 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22727
22728 /* At the moment, we are not defining a suffix for IBM extended double.
22729 If/when the default for -mabi=ieeelongdouble is changed, and we want
22730 to support __ibm128 constants in legacy library code, we may need to
22731 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22732 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22733 __float80 constants. */
22734 }
22735
22736 return VOIDmode;
22737 }
22738
22739 /* Target hook for invalid_arg_for_unprototyped_fn. */
22740 static const char *
22741 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22742 {
22743 return (!rs6000_darwin64_abi
22744 && typelist == 0
22745 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22746 && (funcdecl == NULL_TREE
22747 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22748 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22749 ? N_("AltiVec argument passed to unprototyped function")
22750 : NULL;
22751 }
22752
22753 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22754 setup by using __stack_chk_fail_local hidden function instead of
22755 calling __stack_chk_fail directly. Otherwise it is better to call
22756 __stack_chk_fail directly. */
22757
22758 static tree ATTRIBUTE_UNUSED
22759 rs6000_stack_protect_fail (void)
22760 {
22761 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22762 ? default_hidden_stack_protect_fail ()
22763 : default_external_stack_protect_fail ();
22764 }
22765
22766 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22767
22768 #if TARGET_ELF
22769 static unsigned HOST_WIDE_INT
22770 rs6000_asan_shadow_offset (void)
22771 {
22772 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22773 }
22774 #endif
22775 \f
22776 /* Mask options that we want to support inside of attribute((target)) and
22777 #pragma GCC target operations. Note, we do not include things like
22778 64/32-bit, endianness, hard/soft floating point, etc. that would have
22779 different calling sequences. */
22780
22781 struct rs6000_opt_mask {
22782 const char *name; /* option name */
22783 HOST_WIDE_INT mask; /* mask to set */
22784 bool invert; /* invert sense of mask */
22785 bool valid_target; /* option is a target option */
22786 };
22787
22788 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22789 {
22790 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22791 { "cmpb", OPTION_MASK_CMPB, false, true },
22792 { "crypto", OPTION_MASK_CRYPTO, false, true },
22793 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22794 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22795 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22796 false, true },
22797 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22798 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22799 { "fprnd", OPTION_MASK_FPRND, false, true },
22800 { "future", OPTION_MASK_FUTURE, false, true },
22801 { "hard-dfp", OPTION_MASK_DFP, false, true },
22802 { "htm", OPTION_MASK_HTM, false, true },
22803 { "isel", OPTION_MASK_ISEL, false, true },
22804 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22805 { "mfpgpr", 0, false, true },
22806 { "modulo", OPTION_MASK_MODULO, false, true },
22807 { "mulhw", OPTION_MASK_MULHW, false, true },
22808 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22809 { "pcrel", OPTION_MASK_PCREL, false, true },
22810 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22811 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22812 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22813 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22814 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22815 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22816 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22817 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22818 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22819 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22820 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
22821 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22822 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22823 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22824 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22825 { "string", 0, false, true },
22826 { "update", OPTION_MASK_NO_UPDATE, true , true },
22827 { "vsx", OPTION_MASK_VSX, false, true },
22828 #ifdef OPTION_MASK_64BIT
22829 #if TARGET_AIX_OS
22830 { "aix64", OPTION_MASK_64BIT, false, false },
22831 { "aix32", OPTION_MASK_64BIT, true, false },
22832 #else
22833 { "64", OPTION_MASK_64BIT, false, false },
22834 { "32", OPTION_MASK_64BIT, true, false },
22835 #endif
22836 #endif
22837 #ifdef OPTION_MASK_EABI
22838 { "eabi", OPTION_MASK_EABI, false, false },
22839 #endif
22840 #ifdef OPTION_MASK_LITTLE_ENDIAN
22841 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22842 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22843 #endif
22844 #ifdef OPTION_MASK_RELOCATABLE
22845 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22846 #endif
22847 #ifdef OPTION_MASK_STRICT_ALIGN
22848 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22849 #endif
22850 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22851 { "string", 0, false, false },
22852 };
22853
22854 /* Builtin mask mapping for printing the flags. */
22855 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22856 {
22857 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22858 { "vsx", RS6000_BTM_VSX, false, false },
22859 { "fre", RS6000_BTM_FRE, false, false },
22860 { "fres", RS6000_BTM_FRES, false, false },
22861 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22862 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22863 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22864 { "cell", RS6000_BTM_CELL, false, false },
22865 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22866 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22867 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22868 { "crypto", RS6000_BTM_CRYPTO, false, false },
22869 { "htm", RS6000_BTM_HTM, false, false },
22870 { "hard-dfp", RS6000_BTM_DFP, false, false },
22871 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22872 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22873 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22874 { "float128", RS6000_BTM_FLOAT128, false, false },
22875 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22876 };
22877
22878 /* Option variables that we want to support inside attribute((target)) and
22879 #pragma GCC target operations. */
22880
22881 struct rs6000_opt_var {
22882 const char *name; /* option name */
22883 size_t global_offset; /* offset of the option in global_options. */
22884 size_t target_offset; /* offset of the option in target options. */
22885 };
22886
22887 static struct rs6000_opt_var const rs6000_opt_vars[] =
22888 {
22889 { "friz",
22890 offsetof (struct gcc_options, x_TARGET_FRIZ),
22891 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22892 { "avoid-indexed-addresses",
22893 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22894 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22895 { "longcall",
22896 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22897 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22898 { "optimize-swaps",
22899 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22900 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22901 { "allow-movmisalign",
22902 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22903 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22904 { "sched-groups",
22905 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22906 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22907 { "always-hint",
22908 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22909 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22910 { "align-branch-targets",
22911 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22912 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22913 { "sched-prolog",
22914 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22915 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22916 { "sched-epilog",
22917 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22918 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22919 { "speculate-indirect-jumps",
22920 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22921 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22922 };
22923
22924 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22925 parsing. Return true if there were no errors. */
22926
22927 static bool
22928 rs6000_inner_target_options (tree args, bool attr_p)
22929 {
22930 bool ret = true;
22931
22932 if (args == NULL_TREE)
22933 ;
22934
22935 else if (TREE_CODE (args) == STRING_CST)
22936 {
22937 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22938 char *q;
22939
22940 while ((q = strtok (p, ",")) != NULL)
22941 {
22942 bool error_p = false;
22943 bool not_valid_p = false;
22944 const char *cpu_opt = NULL;
22945
22946 p = NULL;
22947 if (strncmp (q, "cpu=", 4) == 0)
22948 {
22949 int cpu_index = rs6000_cpu_name_lookup (q+4);
22950 if (cpu_index >= 0)
22951 rs6000_cpu_index = cpu_index;
22952 else
22953 {
22954 error_p = true;
22955 cpu_opt = q+4;
22956 }
22957 }
22958 else if (strncmp (q, "tune=", 5) == 0)
22959 {
22960 int tune_index = rs6000_cpu_name_lookup (q+5);
22961 if (tune_index >= 0)
22962 rs6000_tune_index = tune_index;
22963 else
22964 {
22965 error_p = true;
22966 cpu_opt = q+5;
22967 }
22968 }
22969 else
22970 {
22971 size_t i;
22972 bool invert = false;
22973 char *r = q;
22974
22975 error_p = true;
22976 if (strncmp (r, "no-", 3) == 0)
22977 {
22978 invert = true;
22979 r += 3;
22980 }
22981
22982 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
22983 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
22984 {
22985 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
22986
22987 if (!rs6000_opt_masks[i].valid_target)
22988 not_valid_p = true;
22989 else
22990 {
22991 error_p = false;
22992 rs6000_isa_flags_explicit |= mask;
22993
22994 /* VSX needs altivec, so -mvsx automagically sets
22995 altivec and disables -mavoid-indexed-addresses. */
22996 if (!invert)
22997 {
22998 if (mask == OPTION_MASK_VSX)
22999 {
23000 mask |= OPTION_MASK_ALTIVEC;
23001 TARGET_AVOID_XFORM = 0;
23002 }
23003 }
23004
23005 if (rs6000_opt_masks[i].invert)
23006 invert = !invert;
23007
23008 if (invert)
23009 rs6000_isa_flags &= ~mask;
23010 else
23011 rs6000_isa_flags |= mask;
23012 }
23013 break;
23014 }
23015
23016 if (error_p && !not_valid_p)
23017 {
23018 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23019 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23020 {
23021 size_t j = rs6000_opt_vars[i].global_offset;
23022 *((int *) ((char *)&global_options + j)) = !invert;
23023 error_p = false;
23024 not_valid_p = false;
23025 break;
23026 }
23027 }
23028 }
23029
23030 if (error_p)
23031 {
23032 const char *eprefix, *esuffix;
23033
23034 ret = false;
23035 if (attr_p)
23036 {
23037 eprefix = "__attribute__((__target__(";
23038 esuffix = ")))";
23039 }
23040 else
23041 {
23042 eprefix = "#pragma GCC target ";
23043 esuffix = "";
23044 }
23045
23046 if (cpu_opt)
23047 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23048 q, esuffix);
23049 else if (not_valid_p)
23050 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23051 else
23052 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23053 }
23054 }
23055 }
23056
23057 else if (TREE_CODE (args) == TREE_LIST)
23058 {
23059 do
23060 {
23061 tree value = TREE_VALUE (args);
23062 if (value)
23063 {
23064 bool ret2 = rs6000_inner_target_options (value, attr_p);
23065 if (!ret2)
23066 ret = false;
23067 }
23068 args = TREE_CHAIN (args);
23069 }
23070 while (args != NULL_TREE);
23071 }
23072
23073 else
23074 {
23075 error ("attribute %<target%> argument not a string");
23076 return false;
23077 }
23078
23079 return ret;
23080 }
23081
23082 /* Print out the target options as a list for -mdebug=target. */
23083
23084 static void
23085 rs6000_debug_target_options (tree args, const char *prefix)
23086 {
23087 if (args == NULL_TREE)
23088 fprintf (stderr, "%s<NULL>", prefix);
23089
23090 else if (TREE_CODE (args) == STRING_CST)
23091 {
23092 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23093 char *q;
23094
23095 while ((q = strtok (p, ",")) != NULL)
23096 {
23097 p = NULL;
23098 fprintf (stderr, "%s\"%s\"", prefix, q);
23099 prefix = ", ";
23100 }
23101 }
23102
23103 else if (TREE_CODE (args) == TREE_LIST)
23104 {
23105 do
23106 {
23107 tree value = TREE_VALUE (args);
23108 if (value)
23109 {
23110 rs6000_debug_target_options (value, prefix);
23111 prefix = ", ";
23112 }
23113 args = TREE_CHAIN (args);
23114 }
23115 while (args != NULL_TREE);
23116 }
23117
23118 else
23119 gcc_unreachable ();
23120
23121 return;
23122 }
23123
23124 \f
23125 /* Hook to validate attribute((target("..."))). */
23126
23127 static bool
23128 rs6000_valid_attribute_p (tree fndecl,
23129 tree ARG_UNUSED (name),
23130 tree args,
23131 int flags)
23132 {
23133 struct cl_target_option cur_target;
23134 bool ret;
23135 tree old_optimize;
23136 tree new_target, new_optimize;
23137 tree func_optimize;
23138
23139 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23140
23141 if (TARGET_DEBUG_TARGET)
23142 {
23143 tree tname = DECL_NAME (fndecl);
23144 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23145 if (tname)
23146 fprintf (stderr, "function: %.*s\n",
23147 (int) IDENTIFIER_LENGTH (tname),
23148 IDENTIFIER_POINTER (tname));
23149 else
23150 fprintf (stderr, "function: unknown\n");
23151
23152 fprintf (stderr, "args:");
23153 rs6000_debug_target_options (args, " ");
23154 fprintf (stderr, "\n");
23155
23156 if (flags)
23157 fprintf (stderr, "flags: 0x%x\n", flags);
23158
23159 fprintf (stderr, "--------------------\n");
23160 }
23161
23162 /* attribute((target("default"))) does nothing, beyond
23163 affecting multi-versioning. */
23164 if (TREE_VALUE (args)
23165 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23166 && TREE_CHAIN (args) == NULL_TREE
23167 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23168 return true;
23169
23170 old_optimize = build_optimization_node (&global_options);
23171 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23172
23173 /* If the function changed the optimization levels as well as setting target
23174 options, start with the optimizations specified. */
23175 if (func_optimize && func_optimize != old_optimize)
23176 cl_optimization_restore (&global_options,
23177 TREE_OPTIMIZATION (func_optimize));
23178
23179 /* The target attributes may also change some optimization flags, so update
23180 the optimization options if necessary. */
23181 cl_target_option_save (&cur_target, &global_options);
23182 rs6000_cpu_index = rs6000_tune_index = -1;
23183 ret = rs6000_inner_target_options (args, true);
23184
23185 /* Set up any additional state. */
23186 if (ret)
23187 {
23188 ret = rs6000_option_override_internal (false);
23189 new_target = build_target_option_node (&global_options);
23190 }
23191 else
23192 new_target = NULL;
23193
23194 new_optimize = build_optimization_node (&global_options);
23195
23196 if (!new_target)
23197 ret = false;
23198
23199 else if (fndecl)
23200 {
23201 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23202
23203 if (old_optimize != new_optimize)
23204 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23205 }
23206
23207 cl_target_option_restore (&global_options, &cur_target);
23208
23209 if (old_optimize != new_optimize)
23210 cl_optimization_restore (&global_options,
23211 TREE_OPTIMIZATION (old_optimize));
23212
23213 return ret;
23214 }
23215
23216 \f
23217 /* Hook to validate the current #pragma GCC target and set the state, and
23218 update the macros based on what was changed. If ARGS is NULL, then
23219 POP_TARGET is used to reset the options. */
23220
23221 bool
23222 rs6000_pragma_target_parse (tree args, tree pop_target)
23223 {
23224 tree prev_tree = build_target_option_node (&global_options);
23225 tree cur_tree;
23226 struct cl_target_option *prev_opt, *cur_opt;
23227 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23228 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23229
23230 if (TARGET_DEBUG_TARGET)
23231 {
23232 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23233 fprintf (stderr, "args:");
23234 rs6000_debug_target_options (args, " ");
23235 fprintf (stderr, "\n");
23236
23237 if (pop_target)
23238 {
23239 fprintf (stderr, "pop_target:\n");
23240 debug_tree (pop_target);
23241 }
23242 else
23243 fprintf (stderr, "pop_target: <NULL>\n");
23244
23245 fprintf (stderr, "--------------------\n");
23246 }
23247
23248 if (! args)
23249 {
23250 cur_tree = ((pop_target)
23251 ? pop_target
23252 : target_option_default_node);
23253 cl_target_option_restore (&global_options,
23254 TREE_TARGET_OPTION (cur_tree));
23255 }
23256 else
23257 {
23258 rs6000_cpu_index = rs6000_tune_index = -1;
23259 if (!rs6000_inner_target_options (args, false)
23260 || !rs6000_option_override_internal (false)
23261 || (cur_tree = build_target_option_node (&global_options))
23262 == NULL_TREE)
23263 {
23264 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23265 fprintf (stderr, "invalid pragma\n");
23266
23267 return false;
23268 }
23269 }
23270
23271 target_option_current_node = cur_tree;
23272 rs6000_activate_target_options (target_option_current_node);
23273
23274 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23275 change the macros that are defined. */
23276 if (rs6000_target_modify_macros_ptr)
23277 {
23278 prev_opt = TREE_TARGET_OPTION (prev_tree);
23279 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23280 prev_flags = prev_opt->x_rs6000_isa_flags;
23281
23282 cur_opt = TREE_TARGET_OPTION (cur_tree);
23283 cur_flags = cur_opt->x_rs6000_isa_flags;
23284 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23285
23286 diff_bumask = (prev_bumask ^ cur_bumask);
23287 diff_flags = (prev_flags ^ cur_flags);
23288
23289 if ((diff_flags != 0) || (diff_bumask != 0))
23290 {
23291 /* Delete old macros. */
23292 rs6000_target_modify_macros_ptr (false,
23293 prev_flags & diff_flags,
23294 prev_bumask & diff_bumask);
23295
23296 /* Define new macros. */
23297 rs6000_target_modify_macros_ptr (true,
23298 cur_flags & diff_flags,
23299 cur_bumask & diff_bumask);
23300 }
23301 }
23302
23303 return true;
23304 }
23305
23306 \f
23307 /* Remember the last target of rs6000_set_current_function. */
23308 static GTY(()) tree rs6000_previous_fndecl;
23309
23310 /* Restore target's globals from NEW_TREE and invalidate the
23311 rs6000_previous_fndecl cache. */
23312
23313 void
23314 rs6000_activate_target_options (tree new_tree)
23315 {
23316 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23317 if (TREE_TARGET_GLOBALS (new_tree))
23318 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23319 else if (new_tree == target_option_default_node)
23320 restore_target_globals (&default_target_globals);
23321 else
23322 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23323 rs6000_previous_fndecl = NULL_TREE;
23324 }
23325
23326 /* Establish appropriate back-end context for processing the function
23327 FNDECL. The argument might be NULL to indicate processing at top
23328 level, outside of any function scope. */
23329 static void
23330 rs6000_set_current_function (tree fndecl)
23331 {
23332 if (TARGET_DEBUG_TARGET)
23333 {
23334 fprintf (stderr, "\n==================== rs6000_set_current_function");
23335
23336 if (fndecl)
23337 fprintf (stderr, ", fndecl %s (%p)",
23338 (DECL_NAME (fndecl)
23339 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23340 : "<unknown>"), (void *)fndecl);
23341
23342 if (rs6000_previous_fndecl)
23343 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23344
23345 fprintf (stderr, "\n");
23346 }
23347
23348 /* Only change the context if the function changes. This hook is called
23349 several times in the course of compiling a function, and we don't want to
23350 slow things down too much or call target_reinit when it isn't safe. */
23351 if (fndecl == rs6000_previous_fndecl)
23352 return;
23353
23354 tree old_tree;
23355 if (rs6000_previous_fndecl == NULL_TREE)
23356 old_tree = target_option_current_node;
23357 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23358 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23359 else
23360 old_tree = target_option_default_node;
23361
23362 tree new_tree;
23363 if (fndecl == NULL_TREE)
23364 {
23365 if (old_tree != target_option_current_node)
23366 new_tree = target_option_current_node;
23367 else
23368 new_tree = NULL_TREE;
23369 }
23370 else
23371 {
23372 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23373 if (new_tree == NULL_TREE)
23374 new_tree = target_option_default_node;
23375 }
23376
23377 if (TARGET_DEBUG_TARGET)
23378 {
23379 if (new_tree)
23380 {
23381 fprintf (stderr, "\nnew fndecl target specific options:\n");
23382 debug_tree (new_tree);
23383 }
23384
23385 if (old_tree)
23386 {
23387 fprintf (stderr, "\nold fndecl target specific options:\n");
23388 debug_tree (old_tree);
23389 }
23390
23391 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23392 fprintf (stderr, "--------------------\n");
23393 }
23394
23395 if (new_tree && old_tree != new_tree)
23396 rs6000_activate_target_options (new_tree);
23397
23398 if (fndecl)
23399 rs6000_previous_fndecl = fndecl;
23400 }
23401
23402 \f
23403 /* Save the current options */
23404
23405 static void
23406 rs6000_function_specific_save (struct cl_target_option *ptr,
23407 struct gcc_options *opts)
23408 {
23409 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23410 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23411 }
23412
23413 /* Restore the current options */
23414
23415 static void
23416 rs6000_function_specific_restore (struct gcc_options *opts,
23417 struct cl_target_option *ptr)
23418
23419 {
23420 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23421 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23422 (void) rs6000_option_override_internal (false);
23423 }
23424
23425 /* Print the current options */
23426
23427 static void
23428 rs6000_function_specific_print (FILE *file, int indent,
23429 struct cl_target_option *ptr)
23430 {
23431 rs6000_print_isa_options (file, indent, "Isa options set",
23432 ptr->x_rs6000_isa_flags);
23433
23434 rs6000_print_isa_options (file, indent, "Isa options explicit",
23435 ptr->x_rs6000_isa_flags_explicit);
23436 }
23437
23438 /* Helper function to print the current isa or misc options on a line. */
23439
23440 static void
23441 rs6000_print_options_internal (FILE *file,
23442 int indent,
23443 const char *string,
23444 HOST_WIDE_INT flags,
23445 const char *prefix,
23446 const struct rs6000_opt_mask *opts,
23447 size_t num_elements)
23448 {
23449 size_t i;
23450 size_t start_column = 0;
23451 size_t cur_column;
23452 size_t max_column = 120;
23453 size_t prefix_len = strlen (prefix);
23454 size_t comma_len = 0;
23455 const char *comma = "";
23456
23457 if (indent)
23458 start_column += fprintf (file, "%*s", indent, "");
23459
23460 if (!flags)
23461 {
23462 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23463 return;
23464 }
23465
23466 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23467
23468 /* Print the various mask options. */
23469 cur_column = start_column;
23470 for (i = 0; i < num_elements; i++)
23471 {
23472 bool invert = opts[i].invert;
23473 const char *name = opts[i].name;
23474 const char *no_str = "";
23475 HOST_WIDE_INT mask = opts[i].mask;
23476 size_t len = comma_len + prefix_len + strlen (name);
23477
23478 if (!invert)
23479 {
23480 if ((flags & mask) == 0)
23481 {
23482 no_str = "no-";
23483 len += sizeof ("no-") - 1;
23484 }
23485
23486 flags &= ~mask;
23487 }
23488
23489 else
23490 {
23491 if ((flags & mask) != 0)
23492 {
23493 no_str = "no-";
23494 len += sizeof ("no-") - 1;
23495 }
23496
23497 flags |= mask;
23498 }
23499
23500 cur_column += len;
23501 if (cur_column > max_column)
23502 {
23503 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23504 cur_column = start_column + len;
23505 comma = "";
23506 }
23507
23508 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23509 comma = ", ";
23510 comma_len = sizeof (", ") - 1;
23511 }
23512
23513 fputs ("\n", file);
23514 }
23515
23516 /* Helper function to print the current isa options on a line. */
23517
23518 static void
23519 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23520 HOST_WIDE_INT flags)
23521 {
23522 rs6000_print_options_internal (file, indent, string, flags, "-m",
23523 &rs6000_opt_masks[0],
23524 ARRAY_SIZE (rs6000_opt_masks));
23525 }
23526
23527 static void
23528 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23529 HOST_WIDE_INT flags)
23530 {
23531 rs6000_print_options_internal (file, indent, string, flags, "",
23532 &rs6000_builtin_mask_names[0],
23533 ARRAY_SIZE (rs6000_builtin_mask_names));
23534 }
23535
23536 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23537 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23538 -mupper-regs-df, etc.).
23539
23540 If the user used -mno-power8-vector, we need to turn off all of the implicit
23541 ISA 2.07 and 3.0 options that relate to the vector unit.
23542
23543 If the user used -mno-power9-vector, we need to turn off all of the implicit
23544 ISA 3.0 options that relate to the vector unit.
23545
23546 This function does not handle explicit options such as the user specifying
23547 -mdirect-move. These are handled in rs6000_option_override_internal, and
23548 the appropriate error is given if needed.
23549
23550 We return a mask of all of the implicit options that should not be enabled
23551 by default. */
23552
23553 static HOST_WIDE_INT
23554 rs6000_disable_incompatible_switches (void)
23555 {
23556 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23557 size_t i, j;
23558
23559 static const struct {
23560 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23561 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23562 const char *const name; /* name of the switch. */
23563 } flags[] = {
23564 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23565 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23566 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23567 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23568 };
23569
23570 for (i = 0; i < ARRAY_SIZE (flags); i++)
23571 {
23572 HOST_WIDE_INT no_flag = flags[i].no_flag;
23573
23574 if ((rs6000_isa_flags & no_flag) == 0
23575 && (rs6000_isa_flags_explicit & no_flag) != 0)
23576 {
23577 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23578 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23579 & rs6000_isa_flags
23580 & dep_flags);
23581
23582 if (set_flags)
23583 {
23584 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23585 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23586 {
23587 set_flags &= ~rs6000_opt_masks[j].mask;
23588 error ("%<-mno-%s%> turns off %<-m%s%>",
23589 flags[i].name,
23590 rs6000_opt_masks[j].name);
23591 }
23592
23593 gcc_assert (!set_flags);
23594 }
23595
23596 rs6000_isa_flags &= ~dep_flags;
23597 ignore_masks |= no_flag | dep_flags;
23598 }
23599 }
23600
23601 return ignore_masks;
23602 }
23603
23604 \f
23605 /* Helper function for printing the function name when debugging. */
23606
23607 static const char *
23608 get_decl_name (tree fn)
23609 {
23610 tree name;
23611
23612 if (!fn)
23613 return "<null>";
23614
23615 name = DECL_NAME (fn);
23616 if (!name)
23617 return "<no-name>";
23618
23619 return IDENTIFIER_POINTER (name);
23620 }
23621
23622 /* Return the clone id of the target we are compiling code for in a target
23623 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23624 the priority list for the target clones (ordered from lowest to
23625 highest). */
23626
23627 static int
23628 rs6000_clone_priority (tree fndecl)
23629 {
23630 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23631 HOST_WIDE_INT isa_masks;
23632 int ret = CLONE_DEFAULT;
23633 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23634 const char *attrs_str = NULL;
23635
23636 attrs = TREE_VALUE (TREE_VALUE (attrs));
23637 attrs_str = TREE_STRING_POINTER (attrs);
23638
23639 /* Return priority zero for default function. Return the ISA needed for the
23640 function if it is not the default. */
23641 if (strcmp (attrs_str, "default") != 0)
23642 {
23643 if (fn_opts == NULL_TREE)
23644 fn_opts = target_option_default_node;
23645
23646 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23647 isa_masks = rs6000_isa_flags;
23648 else
23649 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23650
23651 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23652 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23653 break;
23654 }
23655
23656 if (TARGET_DEBUG_TARGET)
23657 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23658 get_decl_name (fndecl), ret);
23659
23660 return ret;
23661 }
23662
23663 /* This compares the priority of target features in function DECL1 and DECL2.
23664 It returns positive value if DECL1 is higher priority, negative value if
23665 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23666 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23667
23668 static int
23669 rs6000_compare_version_priority (tree decl1, tree decl2)
23670 {
23671 int priority1 = rs6000_clone_priority (decl1);
23672 int priority2 = rs6000_clone_priority (decl2);
23673 int ret = priority1 - priority2;
23674
23675 if (TARGET_DEBUG_TARGET)
23676 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23677 get_decl_name (decl1), get_decl_name (decl2), ret);
23678
23679 return ret;
23680 }
23681
23682 /* Make a dispatcher declaration for the multi-versioned function DECL.
23683 Calls to DECL function will be replaced with calls to the dispatcher
23684 by the front-end. Returns the decl of the dispatcher function. */
23685
23686 static tree
23687 rs6000_get_function_versions_dispatcher (void *decl)
23688 {
23689 tree fn = (tree) decl;
23690 struct cgraph_node *node = NULL;
23691 struct cgraph_node *default_node = NULL;
23692 struct cgraph_function_version_info *node_v = NULL;
23693 struct cgraph_function_version_info *first_v = NULL;
23694
23695 tree dispatch_decl = NULL;
23696
23697 struct cgraph_function_version_info *default_version_info = NULL;
23698 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23699
23700 if (TARGET_DEBUG_TARGET)
23701 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23702 get_decl_name (fn));
23703
23704 node = cgraph_node::get (fn);
23705 gcc_assert (node != NULL);
23706
23707 node_v = node->function_version ();
23708 gcc_assert (node_v != NULL);
23709
23710 if (node_v->dispatcher_resolver != NULL)
23711 return node_v->dispatcher_resolver;
23712
23713 /* Find the default version and make it the first node. */
23714 first_v = node_v;
23715 /* Go to the beginning of the chain. */
23716 while (first_v->prev != NULL)
23717 first_v = first_v->prev;
23718
23719 default_version_info = first_v;
23720 while (default_version_info != NULL)
23721 {
23722 const tree decl2 = default_version_info->this_node->decl;
23723 if (is_function_default_version (decl2))
23724 break;
23725 default_version_info = default_version_info->next;
23726 }
23727
23728 /* If there is no default node, just return NULL. */
23729 if (default_version_info == NULL)
23730 return NULL;
23731
23732 /* Make default info the first node. */
23733 if (first_v != default_version_info)
23734 {
23735 default_version_info->prev->next = default_version_info->next;
23736 if (default_version_info->next)
23737 default_version_info->next->prev = default_version_info->prev;
23738 first_v->prev = default_version_info;
23739 default_version_info->next = first_v;
23740 default_version_info->prev = NULL;
23741 }
23742
23743 default_node = default_version_info->this_node;
23744
23745 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23746 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23747 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23748 "exports hardware capability bits");
23749 #else
23750
23751 if (targetm.has_ifunc_p ())
23752 {
23753 struct cgraph_function_version_info *it_v = NULL;
23754 struct cgraph_node *dispatcher_node = NULL;
23755 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23756
23757 /* Right now, the dispatching is done via ifunc. */
23758 dispatch_decl = make_dispatcher_decl (default_node->decl);
23759
23760 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23761 gcc_assert (dispatcher_node != NULL);
23762 dispatcher_node->dispatcher_function = 1;
23763 dispatcher_version_info
23764 = dispatcher_node->insert_new_function_version ();
23765 dispatcher_version_info->next = default_version_info;
23766 dispatcher_node->definition = 1;
23767
23768 /* Set the dispatcher for all the versions. */
23769 it_v = default_version_info;
23770 while (it_v != NULL)
23771 {
23772 it_v->dispatcher_resolver = dispatch_decl;
23773 it_v = it_v->next;
23774 }
23775 }
23776 else
23777 {
23778 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23779 "multiversioning needs ifunc which is not supported "
23780 "on this target");
23781 }
23782 #endif
23783
23784 return dispatch_decl;
23785 }
23786
23787 /* Make the resolver function decl to dispatch the versions of a multi-
23788 versioned function, DEFAULT_DECL. Create an empty basic block in the
23789 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23790 function. */
23791
23792 static tree
23793 make_resolver_func (const tree default_decl,
23794 const tree dispatch_decl,
23795 basic_block *empty_bb)
23796 {
23797 /* Make the resolver function static. The resolver function returns
23798 void *. */
23799 tree decl_name = clone_function_name (default_decl, "resolver");
23800 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23801 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23802 tree decl = build_fn_decl (resolver_name, type);
23803 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23804
23805 DECL_NAME (decl) = decl_name;
23806 TREE_USED (decl) = 1;
23807 DECL_ARTIFICIAL (decl) = 1;
23808 DECL_IGNORED_P (decl) = 0;
23809 TREE_PUBLIC (decl) = 0;
23810 DECL_UNINLINABLE (decl) = 1;
23811
23812 /* Resolver is not external, body is generated. */
23813 DECL_EXTERNAL (decl) = 0;
23814 DECL_EXTERNAL (dispatch_decl) = 0;
23815
23816 DECL_CONTEXT (decl) = NULL_TREE;
23817 DECL_INITIAL (decl) = make_node (BLOCK);
23818 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23819
23820 /* Build result decl and add to function_decl. */
23821 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23822 DECL_CONTEXT (t) = decl;
23823 DECL_ARTIFICIAL (t) = 1;
23824 DECL_IGNORED_P (t) = 1;
23825 DECL_RESULT (decl) = t;
23826
23827 gimplify_function_tree (decl);
23828 push_cfun (DECL_STRUCT_FUNCTION (decl));
23829 *empty_bb = init_lowered_empty_function (decl, false,
23830 profile_count::uninitialized ());
23831
23832 cgraph_node::add_new_function (decl, true);
23833 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23834
23835 pop_cfun ();
23836
23837 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23838 DECL_ATTRIBUTES (dispatch_decl)
23839 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23840
23841 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23842
23843 return decl;
23844 }
23845
23846 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23847 return a pointer to VERSION_DECL if we are running on a machine that
23848 supports the index CLONE_ISA hardware architecture bits. This function will
23849 be called during version dispatch to decide which function version to
23850 execute. It returns the basic block at the end, to which more conditions
23851 can be added. */
23852
23853 static basic_block
23854 add_condition_to_bb (tree function_decl, tree version_decl,
23855 int clone_isa, basic_block new_bb)
23856 {
23857 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23858
23859 gcc_assert (new_bb != NULL);
23860 gimple_seq gseq = bb_seq (new_bb);
23861
23862
23863 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23864 build_fold_addr_expr (version_decl));
23865 tree result_var = create_tmp_var (ptr_type_node);
23866 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23867 gimple *return_stmt = gimple_build_return (result_var);
23868
23869 if (clone_isa == CLONE_DEFAULT)
23870 {
23871 gimple_seq_add_stmt (&gseq, convert_stmt);
23872 gimple_seq_add_stmt (&gseq, return_stmt);
23873 set_bb_seq (new_bb, gseq);
23874 gimple_set_bb (convert_stmt, new_bb);
23875 gimple_set_bb (return_stmt, new_bb);
23876 pop_cfun ();
23877 return new_bb;
23878 }
23879
23880 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23881 tree cond_var = create_tmp_var (bool_int_type_node);
23882 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23883 const char *arg_str = rs6000_clone_map[clone_isa].name;
23884 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23885 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23886 gimple_call_set_lhs (call_cond_stmt, cond_var);
23887
23888 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23889 gimple_set_bb (call_cond_stmt, new_bb);
23890 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23891
23892 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23893 NULL_TREE, NULL_TREE);
23894 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23895 gimple_set_bb (if_else_stmt, new_bb);
23896 gimple_seq_add_stmt (&gseq, if_else_stmt);
23897
23898 gimple_seq_add_stmt (&gseq, convert_stmt);
23899 gimple_seq_add_stmt (&gseq, return_stmt);
23900 set_bb_seq (new_bb, gseq);
23901
23902 basic_block bb1 = new_bb;
23903 edge e12 = split_block (bb1, if_else_stmt);
23904 basic_block bb2 = e12->dest;
23905 e12->flags &= ~EDGE_FALLTHRU;
23906 e12->flags |= EDGE_TRUE_VALUE;
23907
23908 edge e23 = split_block (bb2, return_stmt);
23909 gimple_set_bb (convert_stmt, bb2);
23910 gimple_set_bb (return_stmt, bb2);
23911
23912 basic_block bb3 = e23->dest;
23913 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
23914
23915 remove_edge (e23);
23916 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
23917
23918 pop_cfun ();
23919 return bb3;
23920 }
23921
23922 /* This function generates the dispatch function for multi-versioned functions.
23923 DISPATCH_DECL is the function which will contain the dispatch logic.
23924 FNDECLS are the function choices for dispatch, and is a tree chain.
23925 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23926 code is generated. */
23927
23928 static int
23929 dispatch_function_versions (tree dispatch_decl,
23930 void *fndecls_p,
23931 basic_block *empty_bb)
23932 {
23933 int ix;
23934 tree ele;
23935 vec<tree> *fndecls;
23936 tree clones[CLONE_MAX];
23937
23938 if (TARGET_DEBUG_TARGET)
23939 fputs ("dispatch_function_versions, top\n", stderr);
23940
23941 gcc_assert (dispatch_decl != NULL
23942 && fndecls_p != NULL
23943 && empty_bb != NULL);
23944
23945 /* fndecls_p is actually a vector. */
23946 fndecls = static_cast<vec<tree> *> (fndecls_p);
23947
23948 /* At least one more version other than the default. */
23949 gcc_assert (fndecls->length () >= 2);
23950
23951 /* The first version in the vector is the default decl. */
23952 memset ((void *) clones, '\0', sizeof (clones));
23953 clones[CLONE_DEFAULT] = (*fndecls)[0];
23954
23955 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23956 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23957 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23958 recent glibc. If we ever need to call __builtin_cpu_init, we would need
23959 to insert the code here to do the call. */
23960
23961 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
23962 {
23963 int priority = rs6000_clone_priority (ele);
23964 if (!clones[priority])
23965 clones[priority] = ele;
23966 }
23967
23968 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
23969 if (clones[ix])
23970 {
23971 if (TARGET_DEBUG_TARGET)
23972 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
23973 ix, get_decl_name (clones[ix]));
23974
23975 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
23976 *empty_bb);
23977 }
23978
23979 return 0;
23980 }
23981
23982 /* Generate the dispatching code body to dispatch multi-versioned function
23983 DECL. The target hook is called to process the "target" attributes and
23984 provide the code to dispatch the right function at run-time. NODE points
23985 to the dispatcher decl whose body will be created. */
23986
23987 static tree
23988 rs6000_generate_version_dispatcher_body (void *node_p)
23989 {
23990 tree resolver;
23991 basic_block empty_bb;
23992 struct cgraph_node *node = (cgraph_node *) node_p;
23993 struct cgraph_function_version_info *ninfo = node->function_version ();
23994
23995 if (ninfo->dispatcher_resolver)
23996 return ninfo->dispatcher_resolver;
23997
23998 /* node is going to be an alias, so remove the finalized bit. */
23999 node->definition = false;
24000
24001 /* The first version in the chain corresponds to the default version. */
24002 ninfo->dispatcher_resolver = resolver
24003 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24004
24005 if (TARGET_DEBUG_TARGET)
24006 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24007 get_decl_name (resolver));
24008
24009 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24010 auto_vec<tree, 2> fn_ver_vec;
24011
24012 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24013 vinfo;
24014 vinfo = vinfo->next)
24015 {
24016 struct cgraph_node *version = vinfo->this_node;
24017 /* Check for virtual functions here again, as by this time it should
24018 have been determined if this function needs a vtable index or
24019 not. This happens for methods in derived classes that override
24020 virtual methods in base classes but are not explicitly marked as
24021 virtual. */
24022 if (DECL_VINDEX (version->decl))
24023 sorry ("Virtual function multiversioning not supported");
24024
24025 fn_ver_vec.safe_push (version->decl);
24026 }
24027
24028 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24029 cgraph_edge::rebuild_edges ();
24030 pop_cfun ();
24031 return resolver;
24032 }
24033
24034 \f
24035 /* Hook to determine if one function can safely inline another. */
24036
24037 static bool
24038 rs6000_can_inline_p (tree caller, tree callee)
24039 {
24040 bool ret = false;
24041 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24042 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24043
24044 /* If the callee has no option attributes, then it is ok to inline. */
24045 if (!callee_tree)
24046 ret = true;
24047
24048 else
24049 {
24050 HOST_WIDE_INT caller_isa;
24051 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24052 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24053 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24054
24055 /* If the caller has option attributes, then use them.
24056 Otherwise, use the command line options. */
24057 if (caller_tree)
24058 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24059 else
24060 caller_isa = rs6000_isa_flags;
24061
24062 /* The callee's options must be a subset of the caller's options, i.e.
24063 a vsx function may inline an altivec function, but a no-vsx function
24064 must not inline a vsx function. However, for those options that the
24065 callee has explicitly enabled or disabled, then we must enforce that
24066 the callee's and caller's options match exactly; see PR70010. */
24067 if (((caller_isa & callee_isa) == callee_isa)
24068 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24069 ret = true;
24070 }
24071
24072 if (TARGET_DEBUG_TARGET)
24073 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24074 get_decl_name (caller), get_decl_name (callee),
24075 (ret ? "can" : "cannot"));
24076
24077 return ret;
24078 }
24079 \f
24080 /* Allocate a stack temp and fixup the address so it meets the particular
24081 memory requirements (either offetable or REG+REG addressing). */
24082
24083 rtx
24084 rs6000_allocate_stack_temp (machine_mode mode,
24085 bool offsettable_p,
24086 bool reg_reg_p)
24087 {
24088 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24089 rtx addr = XEXP (stack, 0);
24090 int strict_p = reload_completed;
24091
24092 if (!legitimate_indirect_address_p (addr, strict_p))
24093 {
24094 if (offsettable_p
24095 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24096 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24097
24098 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24099 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24100 }
24101
24102 return stack;
24103 }
24104
24105 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24106 convert to such a form to deal with memory reference instructions
24107 like STFIWX and LDBRX that only take reg+reg addressing. */
24108
24109 rtx
24110 rs6000_force_indexed_or_indirect_mem (rtx x)
24111 {
24112 machine_mode mode = GET_MODE (x);
24113
24114 gcc_assert (MEM_P (x));
24115 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24116 {
24117 rtx addr = XEXP (x, 0);
24118 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24119 {
24120 rtx reg = XEXP (addr, 0);
24121 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24122 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24123 gcc_assert (REG_P (reg));
24124 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24125 addr = reg;
24126 }
24127 else if (GET_CODE (addr) == PRE_MODIFY)
24128 {
24129 rtx reg = XEXP (addr, 0);
24130 rtx expr = XEXP (addr, 1);
24131 gcc_assert (REG_P (reg));
24132 gcc_assert (GET_CODE (expr) == PLUS);
24133 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24134 addr = reg;
24135 }
24136
24137 if (GET_CODE (addr) == PLUS)
24138 {
24139 rtx op0 = XEXP (addr, 0);
24140 rtx op1 = XEXP (addr, 1);
24141 op0 = force_reg (Pmode, op0);
24142 op1 = force_reg (Pmode, op1);
24143 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24144 }
24145 else
24146 x = replace_equiv_address (x, force_reg (Pmode, addr));
24147 }
24148
24149 return x;
24150 }
24151
24152 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24153
24154 On the RS/6000, all integer constants are acceptable, most won't be valid
24155 for particular insns, though. Only easy FP constants are acceptable. */
24156
24157 static bool
24158 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24159 {
24160 if (TARGET_ELF && tls_referenced_p (x))
24161 return false;
24162
24163 if (CONST_DOUBLE_P (x))
24164 return easy_fp_constant (x, mode);
24165
24166 if (GET_CODE (x) == CONST_VECTOR)
24167 return easy_vector_constant (x, mode);
24168
24169 return true;
24170 }
24171
24172 \f
24173 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24174
24175 static bool
24176 chain_already_loaded (rtx_insn *last)
24177 {
24178 for (; last != NULL; last = PREV_INSN (last))
24179 {
24180 if (NONJUMP_INSN_P (last))
24181 {
24182 rtx patt = PATTERN (last);
24183
24184 if (GET_CODE (patt) == SET)
24185 {
24186 rtx lhs = XEXP (patt, 0);
24187
24188 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24189 return true;
24190 }
24191 }
24192 }
24193 return false;
24194 }
24195
24196 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24197
24198 void
24199 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24200 {
24201 rtx func = func_desc;
24202 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24203 rtx toc_load = NULL_RTX;
24204 rtx toc_restore = NULL_RTX;
24205 rtx func_addr;
24206 rtx abi_reg = NULL_RTX;
24207 rtx call[4];
24208 int n_call;
24209 rtx insn;
24210 bool is_pltseq_longcall;
24211
24212 if (global_tlsarg)
24213 tlsarg = global_tlsarg;
24214
24215 /* Handle longcall attributes. */
24216 is_pltseq_longcall = false;
24217 if ((INTVAL (cookie) & CALL_LONG) != 0
24218 && GET_CODE (func_desc) == SYMBOL_REF)
24219 {
24220 func = rs6000_longcall_ref (func_desc, tlsarg);
24221 if (TARGET_PLTSEQ)
24222 is_pltseq_longcall = true;
24223 }
24224
24225 /* Handle indirect calls. */
24226 if (!SYMBOL_REF_P (func)
24227 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24228 {
24229 if (!rs6000_pcrel_p (cfun))
24230 {
24231 /* Save the TOC into its reserved slot before the call,
24232 and prepare to restore it after the call. */
24233 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24234 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24235 gen_rtvec (1, stack_toc_offset),
24236 UNSPEC_TOCSLOT);
24237 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24238
24239 /* Can we optimize saving the TOC in the prologue or
24240 do we need to do it at every call? */
24241 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24242 cfun->machine->save_toc_in_prologue = true;
24243 else
24244 {
24245 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24246 rtx stack_toc_mem = gen_frame_mem (Pmode,
24247 gen_rtx_PLUS (Pmode, stack_ptr,
24248 stack_toc_offset));
24249 MEM_VOLATILE_P (stack_toc_mem) = 1;
24250 if (is_pltseq_longcall)
24251 {
24252 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24253 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24254 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24255 }
24256 else
24257 emit_move_insn (stack_toc_mem, toc_reg);
24258 }
24259 }
24260
24261 if (DEFAULT_ABI == ABI_ELFv2)
24262 {
24263 /* A function pointer in the ELFv2 ABI is just a plain address, but
24264 the ABI requires it to be loaded into r12 before the call. */
24265 func_addr = gen_rtx_REG (Pmode, 12);
24266 if (!rtx_equal_p (func_addr, func))
24267 emit_move_insn (func_addr, func);
24268 abi_reg = func_addr;
24269 /* Indirect calls via CTR are strongly preferred over indirect
24270 calls via LR, so move the address there. Needed to mark
24271 this insn for linker plt sequence editing too. */
24272 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24273 if (is_pltseq_longcall)
24274 {
24275 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24276 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24277 emit_insn (gen_rtx_SET (func_addr, mark_func));
24278 v = gen_rtvec (2, func_addr, func_desc);
24279 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24280 }
24281 else
24282 emit_move_insn (func_addr, abi_reg);
24283 }
24284 else
24285 {
24286 /* A function pointer under AIX is a pointer to a data area whose
24287 first word contains the actual address of the function, whose
24288 second word contains a pointer to its TOC, and whose third word
24289 contains a value to place in the static chain register (r11).
24290 Note that if we load the static chain, our "trampoline" need
24291 not have any executable code. */
24292
24293 /* Load up address of the actual function. */
24294 func = force_reg (Pmode, func);
24295 func_addr = gen_reg_rtx (Pmode);
24296 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24297
24298 /* Indirect calls via CTR are strongly preferred over indirect
24299 calls via LR, so move the address there. */
24300 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24301 emit_move_insn (ctr_reg, func_addr);
24302 func_addr = ctr_reg;
24303
24304 /* Prepare to load the TOC of the called function. Note that the
24305 TOC load must happen immediately before the actual call so
24306 that unwinding the TOC registers works correctly. See the
24307 comment in frob_update_context. */
24308 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24309 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24310 gen_rtx_PLUS (Pmode, func,
24311 func_toc_offset));
24312 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24313
24314 /* If we have a static chain, load it up. But, if the call was
24315 originally direct, the 3rd word has not been written since no
24316 trampoline has been built, so we ought not to load it, lest we
24317 override a static chain value. */
24318 if (!(GET_CODE (func_desc) == SYMBOL_REF
24319 && SYMBOL_REF_FUNCTION_P (func_desc))
24320 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24321 && !chain_already_loaded (get_current_sequence ()->next->last))
24322 {
24323 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24324 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24325 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24326 gen_rtx_PLUS (Pmode, func,
24327 func_sc_offset));
24328 emit_move_insn (sc_reg, func_sc_mem);
24329 abi_reg = sc_reg;
24330 }
24331 }
24332 }
24333 else
24334 {
24335 /* No TOC register needed for calls from PC-relative callers. */
24336 if (!rs6000_pcrel_p (cfun))
24337 /* Direct calls use the TOC: for local calls, the callee will
24338 assume the TOC register is set; for non-local calls, the
24339 PLT stub needs the TOC register. */
24340 abi_reg = toc_reg;
24341 func_addr = func;
24342 }
24343
24344 /* Create the call. */
24345 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24346 if (value != NULL_RTX)
24347 call[0] = gen_rtx_SET (value, call[0]);
24348 n_call = 1;
24349
24350 if (toc_load)
24351 call[n_call++] = toc_load;
24352 if (toc_restore)
24353 call[n_call++] = toc_restore;
24354
24355 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24356
24357 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24358 insn = emit_call_insn (insn);
24359
24360 /* Mention all registers defined by the ABI to hold information
24361 as uses in CALL_INSN_FUNCTION_USAGE. */
24362 if (abi_reg)
24363 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24364 }
24365
24366 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24367
24368 void
24369 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24370 {
24371 rtx call[2];
24372 rtx insn;
24373
24374 gcc_assert (INTVAL (cookie) == 0);
24375
24376 if (global_tlsarg)
24377 tlsarg = global_tlsarg;
24378
24379 /* Create the call. */
24380 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24381 if (value != NULL_RTX)
24382 call[0] = gen_rtx_SET (value, call[0]);
24383
24384 call[1] = simple_return_rtx;
24385
24386 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24387 insn = emit_call_insn (insn);
24388
24389 /* Note use of the TOC register. */
24390 if (!rs6000_pcrel_p (cfun))
24391 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24392 gen_rtx_REG (Pmode, TOC_REGNUM));
24393 }
24394
24395 /* Expand code to perform a call under the SYSV4 ABI. */
24396
24397 void
24398 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24399 {
24400 rtx func = func_desc;
24401 rtx func_addr;
24402 rtx call[4];
24403 rtx insn;
24404 rtx abi_reg = NULL_RTX;
24405 int n;
24406
24407 if (global_tlsarg)
24408 tlsarg = global_tlsarg;
24409
24410 /* Handle longcall attributes. */
24411 if ((INTVAL (cookie) & CALL_LONG) != 0
24412 && GET_CODE (func_desc) == SYMBOL_REF)
24413 {
24414 func = rs6000_longcall_ref (func_desc, tlsarg);
24415 /* If the longcall was implemented as an inline PLT call using
24416 PLT unspecs then func will be REG:r11. If not, func will be
24417 a pseudo reg. The inline PLT call sequence supports lazy
24418 linking (and longcalls to functions in dlopen'd libraries).
24419 The other style of longcalls don't. The lazy linking entry
24420 to the dynamic symbol resolver requires r11 be the function
24421 address (as it is for linker generated PLT stubs). Ensure
24422 r11 stays valid to the bctrl by marking r11 used by the call. */
24423 if (TARGET_PLTSEQ)
24424 abi_reg = func;
24425 }
24426
24427 /* Handle indirect calls. */
24428 if (GET_CODE (func) != SYMBOL_REF)
24429 {
24430 func = force_reg (Pmode, func);
24431
24432 /* Indirect calls via CTR are strongly preferred over indirect
24433 calls via LR, so move the address there. That can't be left
24434 to reload because we want to mark every instruction in an
24435 inline PLT call sequence with a reloc, enabling the linker to
24436 edit the sequence back to a direct call when that makes sense. */
24437 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24438 if (abi_reg)
24439 {
24440 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24441 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24442 emit_insn (gen_rtx_SET (func_addr, mark_func));
24443 v = gen_rtvec (2, func_addr, func_desc);
24444 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24445 }
24446 else
24447 emit_move_insn (func_addr, func);
24448 }
24449 else
24450 func_addr = func;
24451
24452 /* Create the call. */
24453 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24454 if (value != NULL_RTX)
24455 call[0] = gen_rtx_SET (value, call[0]);
24456
24457 call[1] = gen_rtx_USE (VOIDmode, cookie);
24458 n = 2;
24459 if (TARGET_SECURE_PLT
24460 && flag_pic
24461 && GET_CODE (func_addr) == SYMBOL_REF
24462 && !SYMBOL_REF_LOCAL_P (func_addr))
24463 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24464
24465 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24466
24467 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24468 insn = emit_call_insn (insn);
24469 if (abi_reg)
24470 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24471 }
24472
24473 /* Expand code to perform a sibling call under the SysV4 ABI. */
24474
24475 void
24476 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24477 {
24478 rtx func = func_desc;
24479 rtx func_addr;
24480 rtx call[3];
24481 rtx insn;
24482 rtx abi_reg = NULL_RTX;
24483
24484 if (global_tlsarg)
24485 tlsarg = global_tlsarg;
24486
24487 /* Handle longcall attributes. */
24488 if ((INTVAL (cookie) & CALL_LONG) != 0
24489 && GET_CODE (func_desc) == SYMBOL_REF)
24490 {
24491 func = rs6000_longcall_ref (func_desc, tlsarg);
24492 /* If the longcall was implemented as an inline PLT call using
24493 PLT unspecs then func will be REG:r11. If not, func will be
24494 a pseudo reg. The inline PLT call sequence supports lazy
24495 linking (and longcalls to functions in dlopen'd libraries).
24496 The other style of longcalls don't. The lazy linking entry
24497 to the dynamic symbol resolver requires r11 be the function
24498 address (as it is for linker generated PLT stubs). Ensure
24499 r11 stays valid to the bctr by marking r11 used by the call. */
24500 if (TARGET_PLTSEQ)
24501 abi_reg = func;
24502 }
24503
24504 /* Handle indirect calls. */
24505 if (GET_CODE (func) != SYMBOL_REF)
24506 {
24507 func = force_reg (Pmode, func);
24508
24509 /* Indirect sibcalls must go via CTR. That can't be left to
24510 reload because we want to mark every instruction in an inline
24511 PLT call sequence with a reloc, enabling the linker to edit
24512 the sequence back to a direct call when that makes sense. */
24513 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24514 if (abi_reg)
24515 {
24516 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24517 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24518 emit_insn (gen_rtx_SET (func_addr, mark_func));
24519 v = gen_rtvec (2, func_addr, func_desc);
24520 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24521 }
24522 else
24523 emit_move_insn (func_addr, func);
24524 }
24525 else
24526 func_addr = func;
24527
24528 /* Create the call. */
24529 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24530 if (value != NULL_RTX)
24531 call[0] = gen_rtx_SET (value, call[0]);
24532
24533 call[1] = gen_rtx_USE (VOIDmode, cookie);
24534 call[2] = simple_return_rtx;
24535
24536 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24537 insn = emit_call_insn (insn);
24538 if (abi_reg)
24539 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24540 }
24541
24542 #if TARGET_MACHO
24543
24544 /* Expand code to perform a call under the Darwin ABI.
24545 Modulo handling of mlongcall, this is much the same as sysv.
24546 if/when the longcall optimisation is removed, we could drop this
24547 code and use the sysv case (taking care to avoid the tls stuff).
24548
24549 We can use this for sibcalls too, if needed. */
24550
24551 void
24552 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24553 rtx cookie, bool sibcall)
24554 {
24555 rtx func = func_desc;
24556 rtx func_addr;
24557 rtx call[3];
24558 rtx insn;
24559 int cookie_val = INTVAL (cookie);
24560 bool make_island = false;
24561
24562 /* Handle longcall attributes, there are two cases for Darwin:
24563 1) Newer linkers are capable of synthesising any branch islands needed.
24564 2) We need a helper branch island synthesised by the compiler.
24565 The second case has mostly been retired and we don't use it for m64.
24566 In fact, it's is an optimisation, we could just indirect as sysv does..
24567 ... however, backwards compatibility for now.
24568 If we're going to use this, then we need to keep the CALL_LONG bit set,
24569 so that we can pick up the special insn form later. */
24570 if ((cookie_val & CALL_LONG) != 0
24571 && GET_CODE (func_desc) == SYMBOL_REF)
24572 {
24573 /* FIXME: the longcall opt should not hang off this flag, it is most
24574 likely incorrect for kernel-mode code-generation. */
24575 if (darwin_symbol_stubs && TARGET_32BIT)
24576 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24577 else
24578 {
24579 /* The linker is capable of doing this, but the user explicitly
24580 asked for -mlongcall, so we'll do the 'normal' version. */
24581 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24582 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24583 }
24584 }
24585
24586 /* Handle indirect calls. */
24587 if (GET_CODE (func) != SYMBOL_REF)
24588 {
24589 func = force_reg (Pmode, func);
24590
24591 /* Indirect calls via CTR are strongly preferred over indirect
24592 calls via LR, and are required for indirect sibcalls, so move
24593 the address there. */
24594 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24595 emit_move_insn (func_addr, func);
24596 }
24597 else
24598 func_addr = func;
24599
24600 /* Create the call. */
24601 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24602 if (value != NULL_RTX)
24603 call[0] = gen_rtx_SET (value, call[0]);
24604
24605 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24606
24607 if (sibcall)
24608 call[2] = simple_return_rtx;
24609 else
24610 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24611
24612 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24613 insn = emit_call_insn (insn);
24614 /* Now we have the debug info in the insn, we can set up the branch island
24615 if we're using one. */
24616 if (make_island)
24617 {
24618 tree funname = get_identifier (XSTR (func_desc, 0));
24619
24620 if (no_previous_def (funname))
24621 {
24622 rtx label_rtx = gen_label_rtx ();
24623 char *label_buf, temp_buf[256];
24624 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24625 CODE_LABEL_NUMBER (label_rtx));
24626 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24627 tree labelname = get_identifier (label_buf);
24628 add_compiler_branch_island (labelname, funname,
24629 insn_line ((const rtx_insn*)insn));
24630 }
24631 }
24632 }
24633 #endif
24634
24635 void
24636 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24637 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24638 {
24639 #if TARGET_MACHO
24640 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24641 #else
24642 gcc_unreachable();
24643 #endif
24644 }
24645
24646
24647 void
24648 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24649 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24650 {
24651 #if TARGET_MACHO
24652 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24653 #else
24654 gcc_unreachable();
24655 #endif
24656 }
24657
24658 /* Return whether we should generate PC-relative code for FNDECL. */
24659 bool
24660 rs6000_fndecl_pcrel_p (const_tree fndecl)
24661 {
24662 if (DEFAULT_ABI != ABI_ELFv2)
24663 return false;
24664
24665 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24666
24667 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24668 && TARGET_CMODEL == CMODEL_MEDIUM);
24669 }
24670
24671 /* Return whether we should generate PC-relative code for *FN. */
24672 bool
24673 rs6000_pcrel_p (struct function *fn)
24674 {
24675 if (DEFAULT_ABI != ABI_ELFv2)
24676 return false;
24677
24678 /* Optimize usual case. */
24679 if (fn == cfun)
24680 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24681 && TARGET_CMODEL == CMODEL_MEDIUM);
24682
24683 return rs6000_fndecl_pcrel_p (fn->decl);
24684 }
24685
24686 \f
24687 /* Given an address (ADDR), a mode (MODE), and what the format of the
24688 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24689 for the address. */
24690
24691 enum insn_form
24692 address_to_insn_form (rtx addr,
24693 machine_mode mode,
24694 enum non_prefixed_form non_prefixed_format)
24695 {
24696 /* Single register is easy. */
24697 if (REG_P (addr) || SUBREG_P (addr))
24698 return INSN_FORM_BASE_REG;
24699
24700 /* If the non prefixed instruction format doesn't support offset addressing,
24701 make sure only indexed addressing is allowed.
24702
24703 We special case SDmode so that the register allocator does not try to move
24704 SDmode through GPR registers, but instead uses the 32-bit integer load and
24705 store instructions for the floating point registers. */
24706 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24707 {
24708 if (GET_CODE (addr) != PLUS)
24709 return INSN_FORM_BAD;
24710
24711 rtx op0 = XEXP (addr, 0);
24712 rtx op1 = XEXP (addr, 1);
24713 if (!REG_P (op0) && !SUBREG_P (op0))
24714 return INSN_FORM_BAD;
24715
24716 if (!REG_P (op1) && !SUBREG_P (op1))
24717 return INSN_FORM_BAD;
24718
24719 return INSN_FORM_X;
24720 }
24721
24722 /* Deal with update forms. */
24723 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24724 return INSN_FORM_UPDATE;
24725
24726 /* Handle PC-relative symbols and labels. Check for both local and external
24727 symbols. Assume labels are always local. */
24728 if (TARGET_PCREL)
24729 {
24730 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24731 return INSN_FORM_PCREL_EXTERNAL;
24732
24733 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24734 return INSN_FORM_PCREL_LOCAL;
24735 }
24736
24737 if (GET_CODE (addr) == CONST)
24738 addr = XEXP (addr, 0);
24739
24740 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24741 if (GET_CODE (addr) == LO_SUM)
24742 return INSN_FORM_LO_SUM;
24743
24744 /* Everything below must be an offset address of some form. */
24745 if (GET_CODE (addr) != PLUS)
24746 return INSN_FORM_BAD;
24747
24748 rtx op0 = XEXP (addr, 0);
24749 rtx op1 = XEXP (addr, 1);
24750
24751 /* Check for indexed addresses. */
24752 if (REG_P (op1) || SUBREG_P (op1))
24753 {
24754 if (REG_P (op0) || SUBREG_P (op0))
24755 return INSN_FORM_X;
24756
24757 return INSN_FORM_BAD;
24758 }
24759
24760 if (!CONST_INT_P (op1))
24761 return INSN_FORM_BAD;
24762
24763 HOST_WIDE_INT offset = INTVAL (op1);
24764 if (!SIGNED_INTEGER_34BIT_P (offset))
24765 return INSN_FORM_BAD;
24766
24767 /* Check for local and external PC-relative addresses. Labels are always
24768 local. */
24769 if (TARGET_PCREL)
24770 {
24771 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24772 return INSN_FORM_PCREL_EXTERNAL;
24773
24774 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24775 return INSN_FORM_PCREL_LOCAL;
24776 }
24777
24778 /* If it isn't PC-relative, the address must use a base register. */
24779 if (!REG_P (op0) && !SUBREG_P (op0))
24780 return INSN_FORM_BAD;
24781
24782 /* Large offsets must be prefixed. */
24783 if (!SIGNED_INTEGER_16BIT_P (offset))
24784 {
24785 if (TARGET_PREFIXED_ADDR)
24786 return INSN_FORM_PREFIXED_NUMERIC;
24787
24788 return INSN_FORM_BAD;
24789 }
24790
24791 /* We have a 16-bit offset, see what default instruction format to use. */
24792 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24793 {
24794 unsigned size = GET_MODE_SIZE (mode);
24795
24796 /* On 64-bit systems, assume 64-bit integers need to use DS form
24797 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24798 (for LXV and STXV). TImode is problematical in that its normal usage
24799 is expected to be GPRs where it wants a DS instruction format, but if
24800 it goes into the vector registers, it wants a DQ instruction
24801 format. */
24802 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24803 non_prefixed_format = NON_PREFIXED_DS;
24804
24805 else if (TARGET_VSX && size >= 16
24806 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24807 non_prefixed_format = NON_PREFIXED_DQ;
24808
24809 else
24810 non_prefixed_format = NON_PREFIXED_D;
24811 }
24812
24813 /* Classify the D/DS/DQ-form addresses. */
24814 switch (non_prefixed_format)
24815 {
24816 /* Instruction format D, all 16 bits are valid. */
24817 case NON_PREFIXED_D:
24818 return INSN_FORM_D;
24819
24820 /* Instruction format DS, bottom 2 bits must be 0. */
24821 case NON_PREFIXED_DS:
24822 if ((offset & 3) == 0)
24823 return INSN_FORM_DS;
24824
24825 else if (TARGET_PREFIXED_ADDR)
24826 return INSN_FORM_PREFIXED_NUMERIC;
24827
24828 else
24829 return INSN_FORM_BAD;
24830
24831 /* Instruction format DQ, bottom 4 bits must be 0. */
24832 case NON_PREFIXED_DQ:
24833 if ((offset & 15) == 0)
24834 return INSN_FORM_DQ;
24835
24836 else if (TARGET_PREFIXED_ADDR)
24837 return INSN_FORM_PREFIXED_NUMERIC;
24838
24839 else
24840 return INSN_FORM_BAD;
24841
24842 default:
24843 break;
24844 }
24845
24846 return INSN_FORM_BAD;
24847 }
24848
24849 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24850 instruction format (D/DS/DQ) used for offset memory. */
24851
24852 static enum non_prefixed_form
24853 reg_to_non_prefixed (rtx reg, machine_mode mode)
24854 {
24855 /* If it isn't a register, use the defaults. */
24856 if (!REG_P (reg) && !SUBREG_P (reg))
24857 return NON_PREFIXED_DEFAULT;
24858
24859 unsigned int r = reg_or_subregno (reg);
24860
24861 /* If we have a pseudo, use the default instruction format. */
24862 if (!HARD_REGISTER_NUM_P (r))
24863 return NON_PREFIXED_DEFAULT;
24864
24865 unsigned size = GET_MODE_SIZE (mode);
24866
24867 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24868 128-bit floating point, and 128-bit integers. */
24869 if (FP_REGNO_P (r))
24870 {
24871 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24872 return NON_PREFIXED_D;
24873
24874 else if (size < 8)
24875 return NON_PREFIXED_X;
24876
24877 else if (TARGET_VSX && size >= 16
24878 && (VECTOR_MODE_P (mode)
24879 || FLOAT128_VECTOR_P (mode)
24880 || mode == TImode || mode == CTImode))
24881 return NON_PREFIXED_DQ;
24882
24883 else
24884 return NON_PREFIXED_DEFAULT;
24885 }
24886
24887 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24888 128-bit floating point, and 128-bit integers. */
24889 else if (ALTIVEC_REGNO_P (r))
24890 {
24891 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24892 return NON_PREFIXED_DS;
24893
24894 else if (size < 8)
24895 return NON_PREFIXED_X;
24896
24897 else if (TARGET_VSX && size >= 16
24898 && (VECTOR_MODE_P (mode)
24899 || FLOAT128_VECTOR_P (mode)
24900 || mode == TImode || mode == CTImode))
24901 return NON_PREFIXED_DQ;
24902
24903 else
24904 return NON_PREFIXED_DEFAULT;
24905 }
24906
24907 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24908 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24909 through the GPR registers for memory operations. */
24910 else if (TARGET_POWERPC64 && size >= 8)
24911 return NON_PREFIXED_DS;
24912
24913 return NON_PREFIXED_D;
24914 }
24915
24916 \f
24917 /* Whether a load instruction is a prefixed instruction. This is called from
24918 the prefixed attribute processing. */
24919
24920 bool
24921 prefixed_load_p (rtx_insn *insn)
24922 {
24923 /* Validate the insn to make sure it is a normal load insn. */
24924 extract_insn_cached (insn);
24925 if (recog_data.n_operands < 2)
24926 return false;
24927
24928 rtx reg = recog_data.operand[0];
24929 rtx mem = recog_data.operand[1];
24930
24931 if (!REG_P (reg) && !SUBREG_P (reg))
24932 return false;
24933
24934 if (!MEM_P (mem))
24935 return false;
24936
24937 /* Prefixed load instructions do not support update or indexed forms. */
24938 if (get_attr_indexed (insn) == INDEXED_YES
24939 || get_attr_update (insn) == UPDATE_YES)
24940 return false;
24941
24942 /* LWA uses the DS format instead of the D format that LWZ uses. */
24943 enum non_prefixed_form non_prefixed;
24944 machine_mode reg_mode = GET_MODE (reg);
24945 machine_mode mem_mode = GET_MODE (mem);
24946
24947 if (mem_mode == SImode && reg_mode == DImode
24948 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
24949 non_prefixed = NON_PREFIXED_DS;
24950
24951 else
24952 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24953
24954 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24955 }
24956
24957 /* Whether a store instruction is a prefixed instruction. This is called from
24958 the prefixed attribute processing. */
24959
24960 bool
24961 prefixed_store_p (rtx_insn *insn)
24962 {
24963 /* Validate the insn to make sure it is a normal store insn. */
24964 extract_insn_cached (insn);
24965 if (recog_data.n_operands < 2)
24966 return false;
24967
24968 rtx mem = recog_data.operand[0];
24969 rtx reg = recog_data.operand[1];
24970
24971 if (!REG_P (reg) && !SUBREG_P (reg))
24972 return false;
24973
24974 if (!MEM_P (mem))
24975 return false;
24976
24977 /* Prefixed store instructions do not support update or indexed forms. */
24978 if (get_attr_indexed (insn) == INDEXED_YES
24979 || get_attr_update (insn) == UPDATE_YES)
24980 return false;
24981
24982 machine_mode mem_mode = GET_MODE (mem);
24983 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24984 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24985 }
24986
24987 /* Whether a load immediate or add instruction is a prefixed instruction. This
24988 is called from the prefixed attribute processing. */
24989
24990 bool
24991 prefixed_paddi_p (rtx_insn *insn)
24992 {
24993 rtx set = single_set (insn);
24994 if (!set)
24995 return false;
24996
24997 rtx dest = SET_DEST (set);
24998 rtx src = SET_SRC (set);
24999
25000 if (!REG_P (dest) && !SUBREG_P (dest))
25001 return false;
25002
25003 /* Is this a load immediate that can't be done with a simple ADDI or
25004 ADDIS? */
25005 if (CONST_INT_P (src))
25006 return (satisfies_constraint_eI (src)
25007 && !satisfies_constraint_I (src)
25008 && !satisfies_constraint_L (src));
25009
25010 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25011 ADDIS? */
25012 if (GET_CODE (src) == PLUS)
25013 {
25014 rtx op1 = XEXP (src, 1);
25015
25016 return (CONST_INT_P (op1)
25017 && satisfies_constraint_eI (op1)
25018 && !satisfies_constraint_I (op1)
25019 && !satisfies_constraint_L (op1));
25020 }
25021
25022 /* If not, is it a load of a PC-relative address? */
25023 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25024 return false;
25025
25026 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25027 return false;
25028
25029 enum insn_form iform = address_to_insn_form (src, Pmode,
25030 NON_PREFIXED_DEFAULT);
25031
25032 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25033 }
25034
25035 /* Whether the next instruction needs a 'p' prefix issued before the
25036 instruction is printed out. */
25037 static bool next_insn_prefixed_p;
25038
25039 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25040 outputting the assembler code. On the PowerPC, we remember if the current
25041 insn is a prefixed insn where we need to emit a 'p' before the insn.
25042
25043 In addition, if the insn is part of a PC-relative reference to an external
25044 label optimization, this is recorded also. */
25045 void
25046 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25047 {
25048 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25049 return;
25050 }
25051
25052 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25053 We use it to emit a 'p' for prefixed insns that is set in
25054 FINAL_PRESCAN_INSN. */
25055 void
25056 rs6000_asm_output_opcode (FILE *stream)
25057 {
25058 if (next_insn_prefixed_p)
25059 fprintf (stream, "p");
25060
25061 return;
25062 }
25063
25064 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25065 should be adjusted to reflect any required changes. This macro is used when
25066 there is some systematic length adjustment required that would be difficult
25067 to express in the length attribute.
25068
25069 In the PowerPC, we use this to adjust the length of an instruction if one or
25070 more prefixed instructions are generated, using the attribute
25071 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25072 hardware requires that a prefied instruciton does not cross a 64-byte
25073 boundary. This means the compiler has to assume the length of the first
25074 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25075 already set for the non-prefixed instruction, we just need to udpate for the
25076 difference. */
25077
25078 int
25079 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25080 {
25081 if (TARGET_PREFIXED_ADDR && NONJUMP_INSN_P (insn))
25082 {
25083 rtx pattern = PATTERN (insn);
25084 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25085 && get_attr_prefixed (insn) == PREFIXED_YES)
25086 {
25087 int num_prefixed = get_attr_max_prefixed_insns (insn);
25088 length += 4 * (num_prefixed + 1);
25089 }
25090 }
25091
25092 return length;
25093 }
25094
25095 \f
25096 #ifdef HAVE_GAS_HIDDEN
25097 # define USE_HIDDEN_LINKONCE 1
25098 #else
25099 # define USE_HIDDEN_LINKONCE 0
25100 #endif
25101
25102 /* Fills in the label name that should be used for a 476 link stack thunk. */
25103
25104 void
25105 get_ppc476_thunk_name (char name[32])
25106 {
25107 gcc_assert (TARGET_LINK_STACK);
25108
25109 if (USE_HIDDEN_LINKONCE)
25110 sprintf (name, "__ppc476.get_thunk");
25111 else
25112 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25113 }
25114
25115 /* This function emits the simple thunk routine that is used to preserve
25116 the link stack on the 476 cpu. */
25117
25118 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25119 static void
25120 rs6000_code_end (void)
25121 {
25122 char name[32];
25123 tree decl;
25124
25125 if (!TARGET_LINK_STACK)
25126 return;
25127
25128 get_ppc476_thunk_name (name);
25129
25130 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25131 build_function_type_list (void_type_node, NULL_TREE));
25132 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25133 NULL_TREE, void_type_node);
25134 TREE_PUBLIC (decl) = 1;
25135 TREE_STATIC (decl) = 1;
25136
25137 #if RS6000_WEAK
25138 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25139 {
25140 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25141 targetm.asm_out.unique_section (decl, 0);
25142 switch_to_section (get_named_section (decl, NULL, 0));
25143 DECL_WEAK (decl) = 1;
25144 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25145 targetm.asm_out.globalize_label (asm_out_file, name);
25146 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25147 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25148 }
25149 else
25150 #endif
25151 {
25152 switch_to_section (text_section);
25153 ASM_OUTPUT_LABEL (asm_out_file, name);
25154 }
25155
25156 DECL_INITIAL (decl) = make_node (BLOCK);
25157 current_function_decl = decl;
25158 allocate_struct_function (decl, false);
25159 init_function_start (decl);
25160 first_function_block_is_cold = false;
25161 /* Make sure unwind info is emitted for the thunk if needed. */
25162 final_start_function (emit_barrier (), asm_out_file, 1);
25163
25164 fputs ("\tblr\n", asm_out_file);
25165
25166 final_end_function ();
25167 init_insn_lengths ();
25168 free_after_compilation (cfun);
25169 set_cfun (NULL);
25170 current_function_decl = NULL;
25171 }
25172
25173 /* Add r30 to hard reg set if the prologue sets it up and it is not
25174 pic_offset_table_rtx. */
25175
25176 static void
25177 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25178 {
25179 if (!TARGET_SINGLE_PIC_BASE
25180 && TARGET_TOC
25181 && TARGET_MINIMAL_TOC
25182 && !constant_pool_empty_p ())
25183 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25184 if (cfun->machine->split_stack_argp_used)
25185 add_to_hard_reg_set (&set->set, Pmode, 12);
25186
25187 /* Make sure the hard reg set doesn't include r2, which was possibly added
25188 via PIC_OFFSET_TABLE_REGNUM. */
25189 if (TARGET_TOC)
25190 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25191 }
25192
25193 \f
25194 /* Helper function for rs6000_split_logical to emit a logical instruction after
25195 spliting the operation to single GPR registers.
25196
25197 DEST is the destination register.
25198 OP1 and OP2 are the input source registers.
25199 CODE is the base operation (AND, IOR, XOR, NOT).
25200 MODE is the machine mode.
25201 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25202 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25203 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25204
25205 static void
25206 rs6000_split_logical_inner (rtx dest,
25207 rtx op1,
25208 rtx op2,
25209 enum rtx_code code,
25210 machine_mode mode,
25211 bool complement_final_p,
25212 bool complement_op1_p,
25213 bool complement_op2_p)
25214 {
25215 rtx bool_rtx;
25216
25217 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25218 if (op2 && CONST_INT_P (op2)
25219 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25220 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25221 {
25222 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25223 HOST_WIDE_INT value = INTVAL (op2) & mask;
25224
25225 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25226 if (code == AND)
25227 {
25228 if (value == 0)
25229 {
25230 emit_insn (gen_rtx_SET (dest, const0_rtx));
25231 return;
25232 }
25233
25234 else if (value == mask)
25235 {
25236 if (!rtx_equal_p (dest, op1))
25237 emit_insn (gen_rtx_SET (dest, op1));
25238 return;
25239 }
25240 }
25241
25242 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25243 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25244 else if (code == IOR || code == XOR)
25245 {
25246 if (value == 0)
25247 {
25248 if (!rtx_equal_p (dest, op1))
25249 emit_insn (gen_rtx_SET (dest, op1));
25250 return;
25251 }
25252 }
25253 }
25254
25255 if (code == AND && mode == SImode
25256 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25257 {
25258 emit_insn (gen_andsi3 (dest, op1, op2));
25259 return;
25260 }
25261
25262 if (complement_op1_p)
25263 op1 = gen_rtx_NOT (mode, op1);
25264
25265 if (complement_op2_p)
25266 op2 = gen_rtx_NOT (mode, op2);
25267
25268 /* For canonical RTL, if only one arm is inverted it is the first. */
25269 if (!complement_op1_p && complement_op2_p)
25270 std::swap (op1, op2);
25271
25272 bool_rtx = ((code == NOT)
25273 ? gen_rtx_NOT (mode, op1)
25274 : gen_rtx_fmt_ee (code, mode, op1, op2));
25275
25276 if (complement_final_p)
25277 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25278
25279 emit_insn (gen_rtx_SET (dest, bool_rtx));
25280 }
25281
25282 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25283 operations are split immediately during RTL generation to allow for more
25284 optimizations of the AND/IOR/XOR.
25285
25286 OPERANDS is an array containing the destination and two input operands.
25287 CODE is the base operation (AND, IOR, XOR, NOT).
25288 MODE is the machine mode.
25289 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25290 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25291 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25292 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25293 formation of the AND instructions. */
25294
25295 static void
25296 rs6000_split_logical_di (rtx operands[3],
25297 enum rtx_code code,
25298 bool complement_final_p,
25299 bool complement_op1_p,
25300 bool complement_op2_p)
25301 {
25302 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25303 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25304 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25305 enum hi_lo { hi = 0, lo = 1 };
25306 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25307 size_t i;
25308
25309 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25310 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25311 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25312 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25313
25314 if (code == NOT)
25315 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25316 else
25317 {
25318 if (!CONST_INT_P (operands[2]))
25319 {
25320 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25321 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25322 }
25323 else
25324 {
25325 HOST_WIDE_INT value = INTVAL (operands[2]);
25326 HOST_WIDE_INT value_hi_lo[2];
25327
25328 gcc_assert (!complement_final_p);
25329 gcc_assert (!complement_op1_p);
25330 gcc_assert (!complement_op2_p);
25331
25332 value_hi_lo[hi] = value >> 32;
25333 value_hi_lo[lo] = value & lower_32bits;
25334
25335 for (i = 0; i < 2; i++)
25336 {
25337 HOST_WIDE_INT sub_value = value_hi_lo[i];
25338
25339 if (sub_value & sign_bit)
25340 sub_value |= upper_32bits;
25341
25342 op2_hi_lo[i] = GEN_INT (sub_value);
25343
25344 /* If this is an AND instruction, check to see if we need to load
25345 the value in a register. */
25346 if (code == AND && sub_value != -1 && sub_value != 0
25347 && !and_operand (op2_hi_lo[i], SImode))
25348 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25349 }
25350 }
25351 }
25352
25353 for (i = 0; i < 2; i++)
25354 {
25355 /* Split large IOR/XOR operations. */
25356 if ((code == IOR || code == XOR)
25357 && CONST_INT_P (op2_hi_lo[i])
25358 && !complement_final_p
25359 && !complement_op1_p
25360 && !complement_op2_p
25361 && !logical_const_operand (op2_hi_lo[i], SImode))
25362 {
25363 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25364 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25365 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25366 rtx tmp = gen_reg_rtx (SImode);
25367
25368 /* Make sure the constant is sign extended. */
25369 if ((hi_16bits & sign_bit) != 0)
25370 hi_16bits |= upper_32bits;
25371
25372 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25373 code, SImode, false, false, false);
25374
25375 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25376 code, SImode, false, false, false);
25377 }
25378 else
25379 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25380 code, SImode, complement_final_p,
25381 complement_op1_p, complement_op2_p);
25382 }
25383
25384 return;
25385 }
25386
25387 /* Split the insns that make up boolean operations operating on multiple GPR
25388 registers. The boolean MD patterns ensure that the inputs either are
25389 exactly the same as the output registers, or there is no overlap.
25390
25391 OPERANDS is an array containing the destination and two input operands.
25392 CODE is the base operation (AND, IOR, XOR, NOT).
25393 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25394 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25395 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25396
25397 void
25398 rs6000_split_logical (rtx operands[3],
25399 enum rtx_code code,
25400 bool complement_final_p,
25401 bool complement_op1_p,
25402 bool complement_op2_p)
25403 {
25404 machine_mode mode = GET_MODE (operands[0]);
25405 machine_mode sub_mode;
25406 rtx op0, op1, op2;
25407 int sub_size, regno0, regno1, nregs, i;
25408
25409 /* If this is DImode, use the specialized version that can run before
25410 register allocation. */
25411 if (mode == DImode && !TARGET_POWERPC64)
25412 {
25413 rs6000_split_logical_di (operands, code, complement_final_p,
25414 complement_op1_p, complement_op2_p);
25415 return;
25416 }
25417
25418 op0 = operands[0];
25419 op1 = operands[1];
25420 op2 = (code == NOT) ? NULL_RTX : operands[2];
25421 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25422 sub_size = GET_MODE_SIZE (sub_mode);
25423 regno0 = REGNO (op0);
25424 regno1 = REGNO (op1);
25425
25426 gcc_assert (reload_completed);
25427 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25428 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25429
25430 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25431 gcc_assert (nregs > 1);
25432
25433 if (op2 && REG_P (op2))
25434 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25435
25436 for (i = 0; i < nregs; i++)
25437 {
25438 int offset = i * sub_size;
25439 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25440 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25441 rtx sub_op2 = ((code == NOT)
25442 ? NULL_RTX
25443 : simplify_subreg (sub_mode, op2, mode, offset));
25444
25445 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25446 complement_final_p, complement_op1_p,
25447 complement_op2_p);
25448 }
25449
25450 return;
25451 }
25452
25453 \f
25454 /* Return true if the peephole2 can combine a load involving a combination of
25455 an addis instruction and a load with an offset that can be fused together on
25456 a power8. */
25457
25458 bool
25459 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25460 rtx addis_value, /* addis value. */
25461 rtx target, /* target register that is loaded. */
25462 rtx mem) /* bottom part of the memory addr. */
25463 {
25464 rtx addr;
25465 rtx base_reg;
25466
25467 /* Validate arguments. */
25468 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25469 return false;
25470
25471 if (!base_reg_operand (target, GET_MODE (target)))
25472 return false;
25473
25474 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25475 return false;
25476
25477 /* Allow sign/zero extension. */
25478 if (GET_CODE (mem) == ZERO_EXTEND
25479 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25480 mem = XEXP (mem, 0);
25481
25482 if (!MEM_P (mem))
25483 return false;
25484
25485 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25486 return false;
25487
25488 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25489 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25490 return false;
25491
25492 /* Validate that the register used to load the high value is either the
25493 register being loaded, or we can safely replace its use.
25494
25495 This function is only called from the peephole2 pass and we assume that
25496 there are 2 instructions in the peephole (addis and load), so we want to
25497 check if the target register was not used in the memory address and the
25498 register to hold the addis result is dead after the peephole. */
25499 if (REGNO (addis_reg) != REGNO (target))
25500 {
25501 if (reg_mentioned_p (target, mem))
25502 return false;
25503
25504 if (!peep2_reg_dead_p (2, addis_reg))
25505 return false;
25506
25507 /* If the target register being loaded is the stack pointer, we must
25508 avoid loading any other value into it, even temporarily. */
25509 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25510 return false;
25511 }
25512
25513 base_reg = XEXP (addr, 0);
25514 return REGNO (addis_reg) == REGNO (base_reg);
25515 }
25516
25517 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25518 sequence. We adjust the addis register to use the target register. If the
25519 load sign extends, we adjust the code to do the zero extending load, and an
25520 explicit sign extension later since the fusion only covers zero extending
25521 loads.
25522
25523 The operands are:
25524 operands[0] register set with addis (to be replaced with target)
25525 operands[1] value set via addis
25526 operands[2] target register being loaded
25527 operands[3] D-form memory reference using operands[0]. */
25528
25529 void
25530 expand_fusion_gpr_load (rtx *operands)
25531 {
25532 rtx addis_value = operands[1];
25533 rtx target = operands[2];
25534 rtx orig_mem = operands[3];
25535 rtx new_addr, new_mem, orig_addr, offset;
25536 enum rtx_code plus_or_lo_sum;
25537 machine_mode target_mode = GET_MODE (target);
25538 machine_mode extend_mode = target_mode;
25539 machine_mode ptr_mode = Pmode;
25540 enum rtx_code extend = UNKNOWN;
25541
25542 if (GET_CODE (orig_mem) == ZERO_EXTEND
25543 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25544 {
25545 extend = GET_CODE (orig_mem);
25546 orig_mem = XEXP (orig_mem, 0);
25547 target_mode = GET_MODE (orig_mem);
25548 }
25549
25550 gcc_assert (MEM_P (orig_mem));
25551
25552 orig_addr = XEXP (orig_mem, 0);
25553 plus_or_lo_sum = GET_CODE (orig_addr);
25554 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25555
25556 offset = XEXP (orig_addr, 1);
25557 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25558 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25559
25560 if (extend != UNKNOWN)
25561 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25562
25563 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25564 UNSPEC_FUSION_GPR);
25565 emit_insn (gen_rtx_SET (target, new_mem));
25566
25567 if (extend == SIGN_EXTEND)
25568 {
25569 int sub_off = ((BYTES_BIG_ENDIAN)
25570 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25571 : 0);
25572 rtx sign_reg
25573 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25574
25575 emit_insn (gen_rtx_SET (target,
25576 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25577 }
25578
25579 return;
25580 }
25581
25582 /* Emit the addis instruction that will be part of a fused instruction
25583 sequence. */
25584
25585 void
25586 emit_fusion_addis (rtx target, rtx addis_value)
25587 {
25588 rtx fuse_ops[10];
25589 const char *addis_str = NULL;
25590
25591 /* Emit the addis instruction. */
25592 fuse_ops[0] = target;
25593 if (satisfies_constraint_L (addis_value))
25594 {
25595 fuse_ops[1] = addis_value;
25596 addis_str = "lis %0,%v1";
25597 }
25598
25599 else if (GET_CODE (addis_value) == PLUS)
25600 {
25601 rtx op0 = XEXP (addis_value, 0);
25602 rtx op1 = XEXP (addis_value, 1);
25603
25604 if (REG_P (op0) && CONST_INT_P (op1)
25605 && satisfies_constraint_L (op1))
25606 {
25607 fuse_ops[1] = op0;
25608 fuse_ops[2] = op1;
25609 addis_str = "addis %0,%1,%v2";
25610 }
25611 }
25612
25613 else if (GET_CODE (addis_value) == HIGH)
25614 {
25615 rtx value = XEXP (addis_value, 0);
25616 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25617 {
25618 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25619 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25620 if (TARGET_ELF)
25621 addis_str = "addis %0,%2,%1@toc@ha";
25622
25623 else if (TARGET_XCOFF)
25624 addis_str = "addis %0,%1@u(%2)";
25625
25626 else
25627 gcc_unreachable ();
25628 }
25629
25630 else if (GET_CODE (value) == PLUS)
25631 {
25632 rtx op0 = XEXP (value, 0);
25633 rtx op1 = XEXP (value, 1);
25634
25635 if (GET_CODE (op0) == UNSPEC
25636 && XINT (op0, 1) == UNSPEC_TOCREL
25637 && CONST_INT_P (op1))
25638 {
25639 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25640 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25641 fuse_ops[3] = op1;
25642 if (TARGET_ELF)
25643 addis_str = "addis %0,%2,%1+%3@toc@ha";
25644
25645 else if (TARGET_XCOFF)
25646 addis_str = "addis %0,%1+%3@u(%2)";
25647
25648 else
25649 gcc_unreachable ();
25650 }
25651 }
25652
25653 else if (satisfies_constraint_L (value))
25654 {
25655 fuse_ops[1] = value;
25656 addis_str = "lis %0,%v1";
25657 }
25658
25659 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25660 {
25661 fuse_ops[1] = value;
25662 addis_str = "lis %0,%1@ha";
25663 }
25664 }
25665
25666 if (!addis_str)
25667 fatal_insn ("Could not generate addis value for fusion", addis_value);
25668
25669 output_asm_insn (addis_str, fuse_ops);
25670 }
25671
25672 /* Emit a D-form load or store instruction that is the second instruction
25673 of a fusion sequence. */
25674
25675 static void
25676 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25677 {
25678 rtx fuse_ops[10];
25679 char insn_template[80];
25680
25681 fuse_ops[0] = load_reg;
25682 fuse_ops[1] = addis_reg;
25683
25684 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25685 {
25686 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25687 fuse_ops[2] = offset;
25688 output_asm_insn (insn_template, fuse_ops);
25689 }
25690
25691 else if (GET_CODE (offset) == UNSPEC
25692 && XINT (offset, 1) == UNSPEC_TOCREL)
25693 {
25694 if (TARGET_ELF)
25695 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25696
25697 else if (TARGET_XCOFF)
25698 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25699
25700 else
25701 gcc_unreachable ();
25702
25703 fuse_ops[2] = XVECEXP (offset, 0, 0);
25704 output_asm_insn (insn_template, fuse_ops);
25705 }
25706
25707 else if (GET_CODE (offset) == PLUS
25708 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25709 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25710 && CONST_INT_P (XEXP (offset, 1)))
25711 {
25712 rtx tocrel_unspec = XEXP (offset, 0);
25713 if (TARGET_ELF)
25714 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25715
25716 else if (TARGET_XCOFF)
25717 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25718
25719 else
25720 gcc_unreachable ();
25721
25722 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25723 fuse_ops[3] = XEXP (offset, 1);
25724 output_asm_insn (insn_template, fuse_ops);
25725 }
25726
25727 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25728 {
25729 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25730
25731 fuse_ops[2] = offset;
25732 output_asm_insn (insn_template, fuse_ops);
25733 }
25734
25735 else
25736 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25737
25738 return;
25739 }
25740
25741 /* Given an address, convert it into the addis and load offset parts. Addresses
25742 created during the peephole2 process look like:
25743 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25744 (unspec [(...)] UNSPEC_TOCREL)) */
25745
25746 static void
25747 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25748 {
25749 rtx hi, lo;
25750
25751 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25752 {
25753 hi = XEXP (addr, 0);
25754 lo = XEXP (addr, 1);
25755 }
25756 else
25757 gcc_unreachable ();
25758
25759 *p_hi = hi;
25760 *p_lo = lo;
25761 }
25762
25763 /* Return a string to fuse an addis instruction with a gpr load to the same
25764 register that we loaded up the addis instruction. The address that is used
25765 is the logical address that was formed during peephole2:
25766 (lo_sum (high) (low-part))
25767
25768 The code is complicated, so we call output_asm_insn directly, and just
25769 return "". */
25770
25771 const char *
25772 emit_fusion_gpr_load (rtx target, rtx mem)
25773 {
25774 rtx addis_value;
25775 rtx addr;
25776 rtx load_offset;
25777 const char *load_str = NULL;
25778 machine_mode mode;
25779
25780 if (GET_CODE (mem) == ZERO_EXTEND)
25781 mem = XEXP (mem, 0);
25782
25783 gcc_assert (REG_P (target) && MEM_P (mem));
25784
25785 addr = XEXP (mem, 0);
25786 fusion_split_address (addr, &addis_value, &load_offset);
25787
25788 /* Now emit the load instruction to the same register. */
25789 mode = GET_MODE (mem);
25790 switch (mode)
25791 {
25792 case E_QImode:
25793 load_str = "lbz";
25794 break;
25795
25796 case E_HImode:
25797 load_str = "lhz";
25798 break;
25799
25800 case E_SImode:
25801 case E_SFmode:
25802 load_str = "lwz";
25803 break;
25804
25805 case E_DImode:
25806 case E_DFmode:
25807 gcc_assert (TARGET_POWERPC64);
25808 load_str = "ld";
25809 break;
25810
25811 default:
25812 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25813 }
25814
25815 /* Emit the addis instruction. */
25816 emit_fusion_addis (target, addis_value);
25817
25818 /* Emit the D-form load instruction. */
25819 emit_fusion_load (target, target, load_offset, load_str);
25820
25821 return "";
25822 }
25823 \f
25824
25825 #ifdef RS6000_GLIBC_ATOMIC_FENV
25826 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25827 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25828 #endif
25829
25830 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25831
25832 static void
25833 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25834 {
25835 if (!TARGET_HARD_FLOAT)
25836 {
25837 #ifdef RS6000_GLIBC_ATOMIC_FENV
25838 if (atomic_hold_decl == NULL_TREE)
25839 {
25840 atomic_hold_decl
25841 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25842 get_identifier ("__atomic_feholdexcept"),
25843 build_function_type_list (void_type_node,
25844 double_ptr_type_node,
25845 NULL_TREE));
25846 TREE_PUBLIC (atomic_hold_decl) = 1;
25847 DECL_EXTERNAL (atomic_hold_decl) = 1;
25848 }
25849
25850 if (atomic_clear_decl == NULL_TREE)
25851 {
25852 atomic_clear_decl
25853 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25854 get_identifier ("__atomic_feclearexcept"),
25855 build_function_type_list (void_type_node,
25856 NULL_TREE));
25857 TREE_PUBLIC (atomic_clear_decl) = 1;
25858 DECL_EXTERNAL (atomic_clear_decl) = 1;
25859 }
25860
25861 tree const_double = build_qualified_type (double_type_node,
25862 TYPE_QUAL_CONST);
25863 tree const_double_ptr = build_pointer_type (const_double);
25864 if (atomic_update_decl == NULL_TREE)
25865 {
25866 atomic_update_decl
25867 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25868 get_identifier ("__atomic_feupdateenv"),
25869 build_function_type_list (void_type_node,
25870 const_double_ptr,
25871 NULL_TREE));
25872 TREE_PUBLIC (atomic_update_decl) = 1;
25873 DECL_EXTERNAL (atomic_update_decl) = 1;
25874 }
25875
25876 tree fenv_var = create_tmp_var_raw (double_type_node);
25877 TREE_ADDRESSABLE (fenv_var) = 1;
25878 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25879
25880 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25881 *clear = build_call_expr (atomic_clear_decl, 0);
25882 *update = build_call_expr (atomic_update_decl, 1,
25883 fold_convert (const_double_ptr, fenv_addr));
25884 #endif
25885 return;
25886 }
25887
25888 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25889 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25890 tree call_mffs = build_call_expr (mffs, 0);
25891
25892 /* Generates the equivalent of feholdexcept (&fenv_var)
25893
25894 *fenv_var = __builtin_mffs ();
25895 double fenv_hold;
25896 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25897 __builtin_mtfsf (0xff, fenv_hold); */
25898
25899 /* Mask to clear everything except for the rounding modes and non-IEEE
25900 arithmetic flag. */
25901 const unsigned HOST_WIDE_INT hold_exception_mask =
25902 HOST_WIDE_INT_C (0xffffffff00000007);
25903
25904 tree fenv_var = create_tmp_var_raw (double_type_node);
25905
25906 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
25907
25908 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
25909 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25910 build_int_cst (uint64_type_node,
25911 hold_exception_mask));
25912
25913 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25914 fenv_llu_and);
25915
25916 tree hold_mtfsf = build_call_expr (mtfsf, 2,
25917 build_int_cst (unsigned_type_node, 0xff),
25918 fenv_hold_mtfsf);
25919
25920 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
25921
25922 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25923
25924 double fenv_clear = __builtin_mffs ();
25925 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25926 __builtin_mtfsf (0xff, fenv_clear); */
25927
25928 /* Mask to clear everything except for the rounding modes and non-IEEE
25929 arithmetic flag. */
25930 const unsigned HOST_WIDE_INT clear_exception_mask =
25931 HOST_WIDE_INT_C (0xffffffff00000000);
25932
25933 tree fenv_clear = create_tmp_var_raw (double_type_node);
25934
25935 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
25936
25937 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
25938 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
25939 fenv_clean_llu,
25940 build_int_cst (uint64_type_node,
25941 clear_exception_mask));
25942
25943 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25944 fenv_clear_llu_and);
25945
25946 tree clear_mtfsf = build_call_expr (mtfsf, 2,
25947 build_int_cst (unsigned_type_node, 0xff),
25948 fenv_clear_mtfsf);
25949
25950 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
25951
25952 /* Generates the equivalent of feupdateenv (&fenv_var)
25953
25954 double old_fenv = __builtin_mffs ();
25955 double fenv_update;
25956 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25957 (*(uint64_t*)fenv_var 0x1ff80fff);
25958 __builtin_mtfsf (0xff, fenv_update); */
25959
25960 const unsigned HOST_WIDE_INT update_exception_mask =
25961 HOST_WIDE_INT_C (0xffffffff1fffff00);
25962 const unsigned HOST_WIDE_INT new_exception_mask =
25963 HOST_WIDE_INT_C (0x1ff80fff);
25964
25965 tree old_fenv = create_tmp_var_raw (double_type_node);
25966 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
25967
25968 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
25969 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
25970 build_int_cst (uint64_type_node,
25971 update_exception_mask));
25972
25973 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25974 build_int_cst (uint64_type_node,
25975 new_exception_mask));
25976
25977 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
25978 old_llu_and, new_llu_and);
25979
25980 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25981 new_llu_mask);
25982
25983 tree update_mtfsf = build_call_expr (mtfsf, 2,
25984 build_int_cst (unsigned_type_node, 0xff),
25985 fenv_update_mtfsf);
25986
25987 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
25988 }
25989
25990 void
25991 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
25992 {
25993 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
25994
25995 rtx_tmp0 = gen_reg_rtx (V2DFmode);
25996 rtx_tmp1 = gen_reg_rtx (V2DFmode);
25997
25998 /* The destination of the vmrgew instruction layout is:
25999 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26000 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26001 vmrgew instruction will be correct. */
26002 if (BYTES_BIG_ENDIAN)
26003 {
26004 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26005 GEN_INT (0)));
26006 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26007 GEN_INT (3)));
26008 }
26009 else
26010 {
26011 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26012 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26013 }
26014
26015 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26016 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26017
26018 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26019 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26020
26021 if (BYTES_BIG_ENDIAN)
26022 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26023 else
26024 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26025 }
26026
26027 void
26028 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26029 {
26030 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26031
26032 rtx_tmp0 = gen_reg_rtx (V2DImode);
26033 rtx_tmp1 = gen_reg_rtx (V2DImode);
26034
26035 /* The destination of the vmrgew instruction layout is:
26036 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26037 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26038 vmrgew instruction will be correct. */
26039 if (BYTES_BIG_ENDIAN)
26040 {
26041 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26042 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26043 }
26044 else
26045 {
26046 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26047 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26048 }
26049
26050 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26051 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26052
26053 if (signed_convert)
26054 {
26055 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26056 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26057 }
26058 else
26059 {
26060 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26061 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26062 }
26063
26064 if (BYTES_BIG_ENDIAN)
26065 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26066 else
26067 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26068 }
26069
26070 void
26071 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26072 rtx src2)
26073 {
26074 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26075
26076 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26077 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26078
26079 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26080 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26081
26082 rtx_tmp2 = gen_reg_rtx (V4SImode);
26083 rtx_tmp3 = gen_reg_rtx (V4SImode);
26084
26085 if (signed_convert)
26086 {
26087 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26088 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26089 }
26090 else
26091 {
26092 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26093 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26094 }
26095
26096 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26097 }
26098
26099 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26100
26101 static bool
26102 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26103 optimization_type opt_type)
26104 {
26105 switch (op)
26106 {
26107 case rsqrt_optab:
26108 return (opt_type == OPTIMIZE_FOR_SPEED
26109 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26110
26111 default:
26112 return true;
26113 }
26114 }
26115
26116 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26117
26118 static HOST_WIDE_INT
26119 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26120 {
26121 if (TREE_CODE (exp) == STRING_CST
26122 && (STRICT_ALIGNMENT || !optimize_size))
26123 return MAX (align, BITS_PER_WORD);
26124 return align;
26125 }
26126
26127 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26128
26129 static HOST_WIDE_INT
26130 rs6000_starting_frame_offset (void)
26131 {
26132 if (FRAME_GROWS_DOWNWARD)
26133 return 0;
26134 return RS6000_STARTING_FRAME_OFFSET;
26135 }
26136 \f
26137
26138 /* Create an alias for a mangled name where we have changed the mangling (in
26139 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26140 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26141
26142 #if TARGET_ELF && RS6000_WEAK
26143 static void
26144 rs6000_globalize_decl_name (FILE * stream, tree decl)
26145 {
26146 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26147
26148 targetm.asm_out.globalize_label (stream, name);
26149
26150 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26151 {
26152 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26153 const char *old_name;
26154
26155 ieee128_mangling_gcc_8_1 = true;
26156 lang_hooks.set_decl_assembler_name (decl);
26157 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26158 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26159 ieee128_mangling_gcc_8_1 = false;
26160
26161 if (strcmp (name, old_name) != 0)
26162 {
26163 fprintf (stream, "\t.weak %s\n", old_name);
26164 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26165 }
26166 }
26167 }
26168 #endif
26169
26170 \f
26171 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26172 function names from <foo>l to <foo>f128 if the default long double type is
26173 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26174 include file switches the names on systems that support long double as IEEE
26175 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26176 In the future, glibc will export names like __ieee128_sinf128 and we can
26177 switch to using those instead of using sinf128, which pollutes the user's
26178 namespace.
26179
26180 This will switch the names for Fortran math functions as well (which doesn't
26181 use math.h). However, Fortran needs other changes to the compiler and
26182 library before you can switch the real*16 type at compile time.
26183
26184 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26185 only do this if the default is that long double is IBM extended double, and
26186 the user asked for IEEE 128-bit. */
26187
26188 static tree
26189 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26190 {
26191 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26192 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26193 {
26194 size_t len = IDENTIFIER_LENGTH (id);
26195 const char *name = IDENTIFIER_POINTER (id);
26196
26197 if (name[len - 1] == 'l')
26198 {
26199 bool uses_ieee128_p = false;
26200 tree type = TREE_TYPE (decl);
26201 machine_mode ret_mode = TYPE_MODE (type);
26202
26203 /* See if the function returns a IEEE 128-bit floating point type or
26204 complex type. */
26205 if (ret_mode == TFmode || ret_mode == TCmode)
26206 uses_ieee128_p = true;
26207 else
26208 {
26209 function_args_iterator args_iter;
26210 tree arg;
26211
26212 /* See if the function passes a IEEE 128-bit floating point type
26213 or complex type. */
26214 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26215 {
26216 machine_mode arg_mode = TYPE_MODE (arg);
26217 if (arg_mode == TFmode || arg_mode == TCmode)
26218 {
26219 uses_ieee128_p = true;
26220 break;
26221 }
26222 }
26223 }
26224
26225 /* If we passed or returned an IEEE 128-bit floating point type,
26226 change the name. */
26227 if (uses_ieee128_p)
26228 {
26229 char *name2 = (char *) alloca (len + 4);
26230 memcpy (name2, name, len - 1);
26231 strcpy (name2 + len - 1, "f128");
26232 id = get_identifier (name2);
26233 }
26234 }
26235 }
26236
26237 return id;
26238 }
26239
26240 /* Predict whether the given loop in gimple will be transformed in the RTL
26241 doloop_optimize pass. */
26242
26243 static bool
26244 rs6000_predict_doloop_p (struct loop *loop)
26245 {
26246 gcc_assert (loop);
26247
26248 /* On rs6000, targetm.can_use_doloop_p is actually
26249 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26250 if (loop->inner != NULL)
26251 {
26252 if (dump_file && (dump_flags & TDF_DETAILS))
26253 fprintf (dump_file, "Predict doloop failure due to"
26254 " loop nesting.\n");
26255 return false;
26256 }
26257
26258 return true;
26259 }
26260
26261 struct gcc_target targetm = TARGET_INITIALIZER;
26262
26263 #include "gt-rs6000.h"