1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
58 #include "sched-int.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
69 #include "tree-vector-builder.h"
71 #include "tree-pass.h"
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #include "case-cfn-macros.h"
78 #include "tree-ssa-propagate.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
97 #define TARGET_IEEEQUAD_DEFAULT 0
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load
;
105 /* Counter for labels which are to be placed in .fixup. */
106 int fixuplabelno
= 0;
109 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
112 /* Specify the machine mode that pointers have. After generation of rtl, the
113 compiler makes no further distinction between pointers and any other objects
114 of this machine mode. */
115 scalar_int_mode rs6000_pmode
;
118 /* Note whether IEEE 128-bit floating point was passed or returned, either as
119 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
120 floating point. We changed the default C++ mangling for these types and we
121 may want to generate a weak alias of the old mangling (U10__float128) to the
122 new mangling (u9__ieee128). */
123 bool rs6000_passes_ieee128
= false;
126 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
127 name used in current releases (i.e. u9__ieee128). */
128 static bool ieee128_mangling_gcc_8_1
;
130 /* Width in bits of a pointer. */
131 unsigned rs6000_pointer_size
;
133 #ifdef HAVE_AS_GNU_ATTRIBUTE
134 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
135 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
137 /* Flag whether floating point values have been passed/returned.
138 Note that this doesn't say whether fprs are used, since the
139 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
140 should be set for soft-float values passed in gprs and ieee128
141 values passed in vsx registers. */
142 bool rs6000_passes_float
= false;
143 bool rs6000_passes_long_double
= false;
144 /* Flag whether vector values have been passed/returned. */
145 bool rs6000_passes_vector
= false;
146 /* Flag whether small (<= 8 byte) structures have been returned. */
147 bool rs6000_returns_struct
= false;
150 /* Value is TRUE if register/mode pair is acceptable. */
151 static bool rs6000_hard_regno_mode_ok_p
152 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
154 /* Maximum number of registers needed for a given register class and mode. */
155 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
157 /* How many registers are needed for a given register and mode. */
158 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
160 /* Map register number to register class. */
161 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
163 static int dbg_cost_ctrl
;
165 /* Built in types. */
166 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
167 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
169 /* Flag to say the TOC is initialized */
170 int toc_initialized
, need_toc_init
;
171 char toc_label_name
[10];
173 /* Cached value of rs6000_variable_issue. This is cached in
174 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
175 static short cached_can_issue_more
;
177 static GTY(()) section
*read_only_data_section
;
178 static GTY(()) section
*private_data_section
;
179 static GTY(()) section
*tls_data_section
;
180 static GTY(()) section
*tls_private_data_section
;
181 static GTY(()) section
*read_only_private_data_section
;
182 static GTY(()) section
*sdata2_section
;
184 extern GTY(()) section
*toc_section
;
185 section
*toc_section
= 0;
187 /* Describe the vector unit used for modes. */
188 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
189 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
191 /* Register classes for various constraints that are based on the target
193 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
195 /* Describe the alignment of a vector. */
196 int rs6000_vector_align
[NUM_MACHINE_MODES
];
198 /* Map selected modes to types for builtins. */
199 GTY(()) tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
201 /* What modes to automatically generate reciprocal divide estimate (fre) and
202 reciprocal sqrt (frsqrte) for. */
203 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
205 /* Masks to determine which reciprocal esitmate instructions to generate
207 enum rs6000_recip_mask
{
208 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
209 RECIP_DF_DIV
= 0x002,
210 RECIP_V4SF_DIV
= 0x004,
211 RECIP_V2DF_DIV
= 0x008,
213 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
214 RECIP_DF_RSQRT
= 0x020,
215 RECIP_V4SF_RSQRT
= 0x040,
216 RECIP_V2DF_RSQRT
= 0x080,
218 /* Various combination of flags for -mrecip=xxx. */
220 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
221 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
222 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
224 RECIP_HIGH_PRECISION
= RECIP_ALL
,
226 /* On low precision machines like the power5, don't enable double precision
227 reciprocal square root estimate, since it isn't accurate enough. */
228 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
231 /* -mrecip options. */
234 const char *string
; /* option name */
235 unsigned int mask
; /* mask bits to set */
236 } recip_options
[] = {
237 { "all", RECIP_ALL
},
238 { "none", RECIP_NONE
},
239 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
241 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
242 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
243 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
244 | RECIP_V2DF_RSQRT
) },
245 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
246 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
249 /* On PowerPC, we have a limited number of target clones that we care about
250 which means we can use an array to hold the options, rather than having more
251 elaborate data structures to identify each possible variation. Order the
252 clones from the default to the highest ISA. */
254 CLONE_DEFAULT
= 0, /* default clone. */
255 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
256 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
257 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
258 CLONE_ISA_3_00
, /* ISA 3.00 (power9). */
262 /* Map compiler ISA bits into HWCAP names. */
264 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
265 const char *name
; /* name to use in __builtin_cpu_supports. */
268 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
269 { 0, "" }, /* Default options. */
270 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
271 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
272 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
273 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.00 (power9). */
277 /* Newer LIBCs explicitly export this symbol to declare that they provide
278 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
279 reference to this symbol whenever we expand a CPU builtin, so that
280 we never link against an old LIBC. */
281 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
283 /* True if we have expanded a CPU builtin. */
284 bool cpu_builtin_p
= false;
286 /* Pointer to function (in rs6000-c.c) that can define or undefine target
287 macros that have changed. Languages that don't support the preprocessor
288 don't link in rs6000-c.c, so we can't call it directly. */
289 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
291 /* Simplfy register classes into simpler classifications. We assume
292 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
293 check for standard register classes (gpr/floating/altivec/vsx) and
294 floating/vector classes (float/altivec/vsx). */
296 enum rs6000_reg_type
{
307 /* Map register class to register type. */
308 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
310 /* First/last register type for the 'normal' register types (i.e. general
311 purpose, floating point, altivec, and VSX registers). */
312 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
314 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
317 /* Register classes we care about in secondary reload or go if legitimate
318 address. We only need to worry about GPR, FPR, and Altivec registers here,
319 along an ANY field that is the OR of the 3 register classes. */
321 enum rs6000_reload_reg_type
{
322 RELOAD_REG_GPR
, /* General purpose registers. */
323 RELOAD_REG_FPR
, /* Traditional floating point regs. */
324 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
325 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
329 /* For setting up register classes, loop through the 3 register classes mapping
330 into real registers, and skip the ANY class, which is just an OR of the
332 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
333 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
335 /* Map reload register type to a register in the register class. */
336 struct reload_reg_map_type
{
337 const char *name
; /* Register class name. */
338 int reg
; /* Register in the register class. */
341 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
342 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
343 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
344 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
345 { "Any", -1 }, /* RELOAD_REG_ANY. */
348 /* Mask bits for each register class, indexed per mode. Historically the
349 compiler has been more restrictive which types can do PRE_MODIFY instead of
350 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
351 typedef unsigned char addr_mask_type
;
353 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
354 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
355 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
356 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
357 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
358 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
359 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
360 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
362 /* Register type masks based on the type, of valid addressing modes. */
363 struct rs6000_reg_addr
{
364 enum insn_code reload_load
; /* INSN to reload for loading. */
365 enum insn_code reload_store
; /* INSN to reload for storing. */
366 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
367 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
368 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
369 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
370 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
373 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
375 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
377 mode_supports_pre_incdec_p (machine_mode mode
)
379 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
383 /* Helper function to say whether a mode supports PRE_MODIFY. */
385 mode_supports_pre_modify_p (machine_mode mode
)
387 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
391 /* Return true if we have D-form addressing in altivec registers. */
393 mode_supports_vmx_dform (machine_mode mode
)
395 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
398 /* Return true if we have D-form addressing in VSX registers. This addressing
399 is more limited than normal d-form addressing in that the offset must be
400 aligned on a 16-byte boundary. */
402 mode_supports_dq_form (machine_mode mode
)
404 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
408 /* Given that there exists at least one variable that is set (produced)
409 by OUT_INSN and read (consumed) by IN_INSN, return true iff
410 IN_INSN represents one or more memory store operations and none of
411 the variables set by OUT_INSN is used by IN_INSN as the address of a
412 store operation. If either IN_INSN or OUT_INSN does not represent
413 a "single" RTL SET expression (as loosely defined by the
414 implementation of the single_set function) or a PARALLEL with only
415 SETs, CLOBBERs, and USEs inside, this function returns false.
417 This rs6000-specific version of store_data_bypass_p checks for
418 certain conditions that result in assertion failures (and internal
419 compiler errors) in the generic store_data_bypass_p function and
420 returns false rather than calling store_data_bypass_p if one of the
421 problematic conditions is detected. */
424 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
431 in_set
= single_set (in_insn
);
434 if (MEM_P (SET_DEST (in_set
)))
436 out_set
= single_set (out_insn
);
439 out_pat
= PATTERN (out_insn
);
440 if (GET_CODE (out_pat
) == PARALLEL
)
442 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
444 out_exp
= XVECEXP (out_pat
, 0, i
);
445 if ((GET_CODE (out_exp
) == CLOBBER
)
446 || (GET_CODE (out_exp
) == USE
))
448 else if (GET_CODE (out_exp
) != SET
)
457 in_pat
= PATTERN (in_insn
);
458 if (GET_CODE (in_pat
) != PARALLEL
)
461 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
463 in_exp
= XVECEXP (in_pat
, 0, i
);
464 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
466 else if (GET_CODE (in_exp
) != SET
)
469 if (MEM_P (SET_DEST (in_exp
)))
471 out_set
= single_set (out_insn
);
474 out_pat
= PATTERN (out_insn
);
475 if (GET_CODE (out_pat
) != PARALLEL
)
477 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
479 out_exp
= XVECEXP (out_pat
, 0, j
);
480 if ((GET_CODE (out_exp
) == CLOBBER
)
481 || (GET_CODE (out_exp
) == USE
))
483 else if (GET_CODE (out_exp
) != SET
)
490 return store_data_bypass_p (out_insn
, in_insn
);
494 /* Processor costs (relative to an add) */
496 const struct processor_costs
*rs6000_cost
;
498 /* Instruction size costs on 32bit processors. */
500 struct processor_costs size32_cost
= {
501 COSTS_N_INSNS (1), /* mulsi */
502 COSTS_N_INSNS (1), /* mulsi_const */
503 COSTS_N_INSNS (1), /* mulsi_const9 */
504 COSTS_N_INSNS (1), /* muldi */
505 COSTS_N_INSNS (1), /* divsi */
506 COSTS_N_INSNS (1), /* divdi */
507 COSTS_N_INSNS (1), /* fp */
508 COSTS_N_INSNS (1), /* dmul */
509 COSTS_N_INSNS (1), /* sdiv */
510 COSTS_N_INSNS (1), /* ddiv */
511 32, /* cache line size */
515 0, /* SF->DF convert */
518 /* Instruction size costs on 64bit processors. */
520 struct processor_costs size64_cost
= {
521 COSTS_N_INSNS (1), /* mulsi */
522 COSTS_N_INSNS (1), /* mulsi_const */
523 COSTS_N_INSNS (1), /* mulsi_const9 */
524 COSTS_N_INSNS (1), /* muldi */
525 COSTS_N_INSNS (1), /* divsi */
526 COSTS_N_INSNS (1), /* divdi */
527 COSTS_N_INSNS (1), /* fp */
528 COSTS_N_INSNS (1), /* dmul */
529 COSTS_N_INSNS (1), /* sdiv */
530 COSTS_N_INSNS (1), /* ddiv */
531 128, /* cache line size */
535 0, /* SF->DF convert */
538 /* Instruction costs on RS64A processors. */
540 struct processor_costs rs64a_cost
= {
541 COSTS_N_INSNS (20), /* mulsi */
542 COSTS_N_INSNS (12), /* mulsi_const */
543 COSTS_N_INSNS (8), /* mulsi_const9 */
544 COSTS_N_INSNS (34), /* muldi */
545 COSTS_N_INSNS (65), /* divsi */
546 COSTS_N_INSNS (67), /* divdi */
547 COSTS_N_INSNS (4), /* fp */
548 COSTS_N_INSNS (4), /* dmul */
549 COSTS_N_INSNS (31), /* sdiv */
550 COSTS_N_INSNS (31), /* ddiv */
551 128, /* cache line size */
555 0, /* SF->DF convert */
558 /* Instruction costs on MPCCORE processors. */
560 struct processor_costs mpccore_cost
= {
561 COSTS_N_INSNS (2), /* mulsi */
562 COSTS_N_INSNS (2), /* mulsi_const */
563 COSTS_N_INSNS (2), /* mulsi_const9 */
564 COSTS_N_INSNS (2), /* muldi */
565 COSTS_N_INSNS (6), /* divsi */
566 COSTS_N_INSNS (6), /* divdi */
567 COSTS_N_INSNS (4), /* fp */
568 COSTS_N_INSNS (5), /* dmul */
569 COSTS_N_INSNS (10), /* sdiv */
570 COSTS_N_INSNS (17), /* ddiv */
571 32, /* cache line size */
575 0, /* SF->DF convert */
578 /* Instruction costs on PPC403 processors. */
580 struct processor_costs ppc403_cost
= {
581 COSTS_N_INSNS (4), /* mulsi */
582 COSTS_N_INSNS (4), /* mulsi_const */
583 COSTS_N_INSNS (4), /* mulsi_const9 */
584 COSTS_N_INSNS (4), /* muldi */
585 COSTS_N_INSNS (33), /* divsi */
586 COSTS_N_INSNS (33), /* divdi */
587 COSTS_N_INSNS (11), /* fp */
588 COSTS_N_INSNS (11), /* dmul */
589 COSTS_N_INSNS (11), /* sdiv */
590 COSTS_N_INSNS (11), /* ddiv */
591 32, /* cache line size */
595 0, /* SF->DF convert */
598 /* Instruction costs on PPC405 processors. */
600 struct processor_costs ppc405_cost
= {
601 COSTS_N_INSNS (5), /* mulsi */
602 COSTS_N_INSNS (4), /* mulsi_const */
603 COSTS_N_INSNS (3), /* mulsi_const9 */
604 COSTS_N_INSNS (5), /* muldi */
605 COSTS_N_INSNS (35), /* divsi */
606 COSTS_N_INSNS (35), /* divdi */
607 COSTS_N_INSNS (11), /* fp */
608 COSTS_N_INSNS (11), /* dmul */
609 COSTS_N_INSNS (11), /* sdiv */
610 COSTS_N_INSNS (11), /* ddiv */
611 32, /* cache line size */
615 0, /* SF->DF convert */
618 /* Instruction costs on PPC440 processors. */
620 struct processor_costs ppc440_cost
= {
621 COSTS_N_INSNS (3), /* mulsi */
622 COSTS_N_INSNS (2), /* mulsi_const */
623 COSTS_N_INSNS (2), /* mulsi_const9 */
624 COSTS_N_INSNS (3), /* muldi */
625 COSTS_N_INSNS (34), /* divsi */
626 COSTS_N_INSNS (34), /* divdi */
627 COSTS_N_INSNS (5), /* fp */
628 COSTS_N_INSNS (5), /* dmul */
629 COSTS_N_INSNS (19), /* sdiv */
630 COSTS_N_INSNS (33), /* ddiv */
631 32, /* cache line size */
635 0, /* SF->DF convert */
638 /* Instruction costs on PPC476 processors. */
640 struct processor_costs ppc476_cost
= {
641 COSTS_N_INSNS (4), /* mulsi */
642 COSTS_N_INSNS (4), /* mulsi_const */
643 COSTS_N_INSNS (4), /* mulsi_const9 */
644 COSTS_N_INSNS (4), /* muldi */
645 COSTS_N_INSNS (11), /* divsi */
646 COSTS_N_INSNS (11), /* divdi */
647 COSTS_N_INSNS (6), /* fp */
648 COSTS_N_INSNS (6), /* dmul */
649 COSTS_N_INSNS (19), /* sdiv */
650 COSTS_N_INSNS (33), /* ddiv */
651 32, /* l1 cache line size */
655 0, /* SF->DF convert */
658 /* Instruction costs on PPC601 processors. */
660 struct processor_costs ppc601_cost
= {
661 COSTS_N_INSNS (5), /* mulsi */
662 COSTS_N_INSNS (5), /* mulsi_const */
663 COSTS_N_INSNS (5), /* mulsi_const9 */
664 COSTS_N_INSNS (5), /* muldi */
665 COSTS_N_INSNS (36), /* divsi */
666 COSTS_N_INSNS (36), /* divdi */
667 COSTS_N_INSNS (4), /* fp */
668 COSTS_N_INSNS (5), /* dmul */
669 COSTS_N_INSNS (17), /* sdiv */
670 COSTS_N_INSNS (31), /* ddiv */
671 32, /* cache line size */
675 0, /* SF->DF convert */
678 /* Instruction costs on PPC603 processors. */
680 struct processor_costs ppc603_cost
= {
681 COSTS_N_INSNS (5), /* mulsi */
682 COSTS_N_INSNS (3), /* mulsi_const */
683 COSTS_N_INSNS (2), /* mulsi_const9 */
684 COSTS_N_INSNS (5), /* muldi */
685 COSTS_N_INSNS (37), /* divsi */
686 COSTS_N_INSNS (37), /* divdi */
687 COSTS_N_INSNS (3), /* fp */
688 COSTS_N_INSNS (4), /* dmul */
689 COSTS_N_INSNS (18), /* sdiv */
690 COSTS_N_INSNS (33), /* ddiv */
691 32, /* cache line size */
695 0, /* SF->DF convert */
698 /* Instruction costs on PPC604 processors. */
700 struct processor_costs ppc604_cost
= {
701 COSTS_N_INSNS (4), /* mulsi */
702 COSTS_N_INSNS (4), /* mulsi_const */
703 COSTS_N_INSNS (4), /* mulsi_const9 */
704 COSTS_N_INSNS (4), /* muldi */
705 COSTS_N_INSNS (20), /* divsi */
706 COSTS_N_INSNS (20), /* divdi */
707 COSTS_N_INSNS (3), /* fp */
708 COSTS_N_INSNS (3), /* dmul */
709 COSTS_N_INSNS (18), /* sdiv */
710 COSTS_N_INSNS (32), /* ddiv */
711 32, /* cache line size */
715 0, /* SF->DF convert */
718 /* Instruction costs on PPC604e processors. */
720 struct processor_costs ppc604e_cost
= {
721 COSTS_N_INSNS (2), /* mulsi */
722 COSTS_N_INSNS (2), /* mulsi_const */
723 COSTS_N_INSNS (2), /* mulsi_const9 */
724 COSTS_N_INSNS (2), /* muldi */
725 COSTS_N_INSNS (20), /* divsi */
726 COSTS_N_INSNS (20), /* divdi */
727 COSTS_N_INSNS (3), /* fp */
728 COSTS_N_INSNS (3), /* dmul */
729 COSTS_N_INSNS (18), /* sdiv */
730 COSTS_N_INSNS (32), /* ddiv */
731 32, /* cache line size */
735 0, /* SF->DF convert */
738 /* Instruction costs on PPC620 processors. */
740 struct processor_costs ppc620_cost
= {
741 COSTS_N_INSNS (5), /* mulsi */
742 COSTS_N_INSNS (4), /* mulsi_const */
743 COSTS_N_INSNS (3), /* mulsi_const9 */
744 COSTS_N_INSNS (7), /* muldi */
745 COSTS_N_INSNS (21), /* divsi */
746 COSTS_N_INSNS (37), /* divdi */
747 COSTS_N_INSNS (3), /* fp */
748 COSTS_N_INSNS (3), /* dmul */
749 COSTS_N_INSNS (18), /* sdiv */
750 COSTS_N_INSNS (32), /* ddiv */
751 128, /* cache line size */
755 0, /* SF->DF convert */
758 /* Instruction costs on PPC630 processors. */
760 struct processor_costs ppc630_cost
= {
761 COSTS_N_INSNS (5), /* mulsi */
762 COSTS_N_INSNS (4), /* mulsi_const */
763 COSTS_N_INSNS (3), /* mulsi_const9 */
764 COSTS_N_INSNS (7), /* muldi */
765 COSTS_N_INSNS (21), /* divsi */
766 COSTS_N_INSNS (37), /* divdi */
767 COSTS_N_INSNS (3), /* fp */
768 COSTS_N_INSNS (3), /* dmul */
769 COSTS_N_INSNS (17), /* sdiv */
770 COSTS_N_INSNS (21), /* ddiv */
771 128, /* cache line size */
775 0, /* SF->DF convert */
778 /* Instruction costs on Cell processor. */
779 /* COSTS_N_INSNS (1) ~ one add. */
781 struct processor_costs ppccell_cost
= {
782 COSTS_N_INSNS (9/2)+2, /* mulsi */
783 COSTS_N_INSNS (6/2), /* mulsi_const */
784 COSTS_N_INSNS (6/2), /* mulsi_const9 */
785 COSTS_N_INSNS (15/2)+2, /* muldi */
786 COSTS_N_INSNS (38/2), /* divsi */
787 COSTS_N_INSNS (70/2), /* divdi */
788 COSTS_N_INSNS (10/2), /* fp */
789 COSTS_N_INSNS (10/2), /* dmul */
790 COSTS_N_INSNS (74/2), /* sdiv */
791 COSTS_N_INSNS (74/2), /* ddiv */
792 128, /* cache line size */
796 0, /* SF->DF convert */
799 /* Instruction costs on PPC750 and PPC7400 processors. */
801 struct processor_costs ppc750_cost
= {
802 COSTS_N_INSNS (5), /* mulsi */
803 COSTS_N_INSNS (3), /* mulsi_const */
804 COSTS_N_INSNS (2), /* mulsi_const9 */
805 COSTS_N_INSNS (5), /* muldi */
806 COSTS_N_INSNS (17), /* divsi */
807 COSTS_N_INSNS (17), /* divdi */
808 COSTS_N_INSNS (3), /* fp */
809 COSTS_N_INSNS (3), /* dmul */
810 COSTS_N_INSNS (17), /* sdiv */
811 COSTS_N_INSNS (31), /* ddiv */
812 32, /* cache line size */
816 0, /* SF->DF convert */
819 /* Instruction costs on PPC7450 processors. */
821 struct processor_costs ppc7450_cost
= {
822 COSTS_N_INSNS (4), /* mulsi */
823 COSTS_N_INSNS (3), /* mulsi_const */
824 COSTS_N_INSNS (3), /* mulsi_const9 */
825 COSTS_N_INSNS (4), /* muldi */
826 COSTS_N_INSNS (23), /* divsi */
827 COSTS_N_INSNS (23), /* divdi */
828 COSTS_N_INSNS (5), /* fp */
829 COSTS_N_INSNS (5), /* dmul */
830 COSTS_N_INSNS (21), /* sdiv */
831 COSTS_N_INSNS (35), /* ddiv */
832 32, /* cache line size */
836 0, /* SF->DF convert */
839 /* Instruction costs on PPC8540 processors. */
841 struct processor_costs ppc8540_cost
= {
842 COSTS_N_INSNS (4), /* mulsi */
843 COSTS_N_INSNS (4), /* mulsi_const */
844 COSTS_N_INSNS (4), /* mulsi_const9 */
845 COSTS_N_INSNS (4), /* muldi */
846 COSTS_N_INSNS (19), /* divsi */
847 COSTS_N_INSNS (19), /* divdi */
848 COSTS_N_INSNS (4), /* fp */
849 COSTS_N_INSNS (4), /* dmul */
850 COSTS_N_INSNS (29), /* sdiv */
851 COSTS_N_INSNS (29), /* ddiv */
852 32, /* cache line size */
855 1, /* prefetch streams /*/
856 0, /* SF->DF convert */
859 /* Instruction costs on E300C2 and E300C3 cores. */
861 struct processor_costs ppce300c2c3_cost
= {
862 COSTS_N_INSNS (4), /* mulsi */
863 COSTS_N_INSNS (4), /* mulsi_const */
864 COSTS_N_INSNS (4), /* mulsi_const9 */
865 COSTS_N_INSNS (4), /* muldi */
866 COSTS_N_INSNS (19), /* divsi */
867 COSTS_N_INSNS (19), /* divdi */
868 COSTS_N_INSNS (3), /* fp */
869 COSTS_N_INSNS (4), /* dmul */
870 COSTS_N_INSNS (18), /* sdiv */
871 COSTS_N_INSNS (33), /* ddiv */
875 1, /* prefetch streams /*/
876 0, /* SF->DF convert */
879 /* Instruction costs on PPCE500MC processors. */
881 struct processor_costs ppce500mc_cost
= {
882 COSTS_N_INSNS (4), /* mulsi */
883 COSTS_N_INSNS (4), /* mulsi_const */
884 COSTS_N_INSNS (4), /* mulsi_const9 */
885 COSTS_N_INSNS (4), /* muldi */
886 COSTS_N_INSNS (14), /* divsi */
887 COSTS_N_INSNS (14), /* divdi */
888 COSTS_N_INSNS (8), /* fp */
889 COSTS_N_INSNS (10), /* dmul */
890 COSTS_N_INSNS (36), /* sdiv */
891 COSTS_N_INSNS (66), /* ddiv */
892 64, /* cache line size */
895 1, /* prefetch streams /*/
896 0, /* SF->DF convert */
899 /* Instruction costs on PPCE500MC64 processors. */
901 struct processor_costs ppce500mc64_cost
= {
902 COSTS_N_INSNS (4), /* mulsi */
903 COSTS_N_INSNS (4), /* mulsi_const */
904 COSTS_N_INSNS (4), /* mulsi_const9 */
905 COSTS_N_INSNS (4), /* muldi */
906 COSTS_N_INSNS (14), /* divsi */
907 COSTS_N_INSNS (14), /* divdi */
908 COSTS_N_INSNS (4), /* fp */
909 COSTS_N_INSNS (10), /* dmul */
910 COSTS_N_INSNS (36), /* sdiv */
911 COSTS_N_INSNS (66), /* ddiv */
912 64, /* cache line size */
915 1, /* prefetch streams /*/
916 0, /* SF->DF convert */
919 /* Instruction costs on PPCE5500 processors. */
921 struct processor_costs ppce5500_cost
= {
922 COSTS_N_INSNS (5), /* mulsi */
923 COSTS_N_INSNS (5), /* mulsi_const */
924 COSTS_N_INSNS (4), /* mulsi_const9 */
925 COSTS_N_INSNS (5), /* muldi */
926 COSTS_N_INSNS (14), /* divsi */
927 COSTS_N_INSNS (14), /* divdi */
928 COSTS_N_INSNS (7), /* fp */
929 COSTS_N_INSNS (10), /* dmul */
930 COSTS_N_INSNS (36), /* sdiv */
931 COSTS_N_INSNS (66), /* ddiv */
932 64, /* cache line size */
935 1, /* prefetch streams /*/
936 0, /* SF->DF convert */
939 /* Instruction costs on PPCE6500 processors. */
941 struct processor_costs ppce6500_cost
= {
942 COSTS_N_INSNS (5), /* mulsi */
943 COSTS_N_INSNS (5), /* mulsi_const */
944 COSTS_N_INSNS (4), /* mulsi_const9 */
945 COSTS_N_INSNS (5), /* muldi */
946 COSTS_N_INSNS (14), /* divsi */
947 COSTS_N_INSNS (14), /* divdi */
948 COSTS_N_INSNS (7), /* fp */
949 COSTS_N_INSNS (10), /* dmul */
950 COSTS_N_INSNS (36), /* sdiv */
951 COSTS_N_INSNS (66), /* ddiv */
952 64, /* cache line size */
955 1, /* prefetch streams /*/
956 0, /* SF->DF convert */
959 /* Instruction costs on AppliedMicro Titan processors. */
961 struct processor_costs titan_cost
= {
962 COSTS_N_INSNS (5), /* mulsi */
963 COSTS_N_INSNS (5), /* mulsi_const */
964 COSTS_N_INSNS (5), /* mulsi_const9 */
965 COSTS_N_INSNS (5), /* muldi */
966 COSTS_N_INSNS (18), /* divsi */
967 COSTS_N_INSNS (18), /* divdi */
968 COSTS_N_INSNS (10), /* fp */
969 COSTS_N_INSNS (10), /* dmul */
970 COSTS_N_INSNS (46), /* sdiv */
971 COSTS_N_INSNS (72), /* ddiv */
972 32, /* cache line size */
975 1, /* prefetch streams /*/
976 0, /* SF->DF convert */
979 /* Instruction costs on POWER4 and POWER5 processors. */
981 struct processor_costs power4_cost
= {
982 COSTS_N_INSNS (3), /* mulsi */
983 COSTS_N_INSNS (2), /* mulsi_const */
984 COSTS_N_INSNS (2), /* mulsi_const9 */
985 COSTS_N_INSNS (4), /* muldi */
986 COSTS_N_INSNS (18), /* divsi */
987 COSTS_N_INSNS (34), /* divdi */
988 COSTS_N_INSNS (3), /* fp */
989 COSTS_N_INSNS (3), /* dmul */
990 COSTS_N_INSNS (17), /* sdiv */
991 COSTS_N_INSNS (17), /* ddiv */
992 128, /* cache line size */
995 8, /* prefetch streams /*/
996 0, /* SF->DF convert */
999 /* Instruction costs on POWER6 processors. */
1001 struct processor_costs power6_cost
= {
1002 COSTS_N_INSNS (8), /* mulsi */
1003 COSTS_N_INSNS (8), /* mulsi_const */
1004 COSTS_N_INSNS (8), /* mulsi_const9 */
1005 COSTS_N_INSNS (8), /* muldi */
1006 COSTS_N_INSNS (22), /* divsi */
1007 COSTS_N_INSNS (28), /* divdi */
1008 COSTS_N_INSNS (3), /* fp */
1009 COSTS_N_INSNS (3), /* dmul */
1010 COSTS_N_INSNS (13), /* sdiv */
1011 COSTS_N_INSNS (16), /* ddiv */
1012 128, /* cache line size */
1014 2048, /* l2 cache */
1015 16, /* prefetch streams */
1016 0, /* SF->DF convert */
1019 /* Instruction costs on POWER7 processors. */
1021 struct processor_costs power7_cost
= {
1022 COSTS_N_INSNS (2), /* mulsi */
1023 COSTS_N_INSNS (2), /* mulsi_const */
1024 COSTS_N_INSNS (2), /* mulsi_const9 */
1025 COSTS_N_INSNS (2), /* muldi */
1026 COSTS_N_INSNS (18), /* divsi */
1027 COSTS_N_INSNS (34), /* divdi */
1028 COSTS_N_INSNS (3), /* fp */
1029 COSTS_N_INSNS (3), /* dmul */
1030 COSTS_N_INSNS (13), /* sdiv */
1031 COSTS_N_INSNS (16), /* ddiv */
1032 128, /* cache line size */
1035 12, /* prefetch streams */
1036 COSTS_N_INSNS (3), /* SF->DF convert */
1039 /* Instruction costs on POWER8 processors. */
1041 struct processor_costs power8_cost
= {
1042 COSTS_N_INSNS (3), /* mulsi */
1043 COSTS_N_INSNS (3), /* mulsi_const */
1044 COSTS_N_INSNS (3), /* mulsi_const9 */
1045 COSTS_N_INSNS (3), /* muldi */
1046 COSTS_N_INSNS (19), /* divsi */
1047 COSTS_N_INSNS (35), /* divdi */
1048 COSTS_N_INSNS (3), /* fp */
1049 COSTS_N_INSNS (3), /* dmul */
1050 COSTS_N_INSNS (14), /* sdiv */
1051 COSTS_N_INSNS (17), /* ddiv */
1052 128, /* cache line size */
1055 12, /* prefetch streams */
1056 COSTS_N_INSNS (3), /* SF->DF convert */
1059 /* Instruction costs on POWER9 processors. */
1061 struct processor_costs power9_cost
= {
1062 COSTS_N_INSNS (3), /* mulsi */
1063 COSTS_N_INSNS (3), /* mulsi_const */
1064 COSTS_N_INSNS (3), /* mulsi_const9 */
1065 COSTS_N_INSNS (3), /* muldi */
1066 COSTS_N_INSNS (8), /* divsi */
1067 COSTS_N_INSNS (12), /* divdi */
1068 COSTS_N_INSNS (3), /* fp */
1069 COSTS_N_INSNS (3), /* dmul */
1070 COSTS_N_INSNS (13), /* sdiv */
1071 COSTS_N_INSNS (18), /* ddiv */
1072 128, /* cache line size */
1075 8, /* prefetch streams */
1076 COSTS_N_INSNS (3), /* SF->DF convert */
1079 /* Instruction costs on POWER A2 processors. */
1081 struct processor_costs ppca2_cost
= {
1082 COSTS_N_INSNS (16), /* mulsi */
1083 COSTS_N_INSNS (16), /* mulsi_const */
1084 COSTS_N_INSNS (16), /* mulsi_const9 */
1085 COSTS_N_INSNS (16), /* muldi */
1086 COSTS_N_INSNS (22), /* divsi */
1087 COSTS_N_INSNS (28), /* divdi */
1088 COSTS_N_INSNS (3), /* fp */
1089 COSTS_N_INSNS (3), /* dmul */
1090 COSTS_N_INSNS (59), /* sdiv */
1091 COSTS_N_INSNS (72), /* ddiv */
1094 2048, /* l2 cache */
1095 16, /* prefetch streams */
1096 0, /* SF->DF convert */
1099 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1100 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1103 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1104 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1105 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1106 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1107 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1108 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1109 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1110 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1111 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1113 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1115 static bool is_microcoded_insn (rtx_insn
*);
1116 static bool is_nonpipeline_insn (rtx_insn
*);
1117 static bool is_cracked_insn (rtx_insn
*);
1118 static bool is_load_insn (rtx
, rtx
*);
1119 static bool is_store_insn (rtx
, rtx
*);
1120 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1121 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1122 static bool insn_must_be_first_in_group (rtx_insn
*);
1123 static bool insn_must_be_last_in_group (rtx_insn
*);
1124 int easy_vector_constant (rtx
, machine_mode
);
1125 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1126 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1128 static tree
get_prev_label (tree
);
1130 static bool rs6000_mode_dependent_address (const_rtx
);
1131 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1132 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1133 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1135 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1138 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1139 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1141 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1144 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1148 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1149 = rs6000_mode_dependent_address
;
1151 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1153 = rs6000_secondary_reload_class
;
1155 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1156 = rs6000_preferred_reload_class
;
1158 const int INSN_NOT_AVAILABLE
= -1;
1160 static void rs6000_print_isa_options (FILE *, int, const char *,
1162 static void rs6000_print_builtin_options (FILE *, int, const char *,
1164 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1166 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1167 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1168 enum rs6000_reg_type
,
1170 secondary_reload_info
*,
1172 static enum non_prefixed_form
reg_to_non_prefixed (rtx reg
, machine_mode mode
);
1173 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1175 /* Hash table stuff for keeping track of TOC entries. */
1177 struct GTY((for_user
)) toc_hash_struct
1179 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1180 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1182 machine_mode key_mode
;
1186 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1188 static hashval_t
hash (toc_hash_struct
*);
1189 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1192 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1196 /* Default register names. */
1197 char rs6000_reg_names
[][8] =
1200 "0", "1", "2", "3", "4", "5", "6", "7",
1201 "8", "9", "10", "11", "12", "13", "14", "15",
1202 "16", "17", "18", "19", "20", "21", "22", "23",
1203 "24", "25", "26", "27", "28", "29", "30", "31",
1205 "0", "1", "2", "3", "4", "5", "6", "7",
1206 "8", "9", "10", "11", "12", "13", "14", "15",
1207 "16", "17", "18", "19", "20", "21", "22", "23",
1208 "24", "25", "26", "27", "28", "29", "30", "31",
1210 "0", "1", "2", "3", "4", "5", "6", "7",
1211 "8", "9", "10", "11", "12", "13", "14", "15",
1212 "16", "17", "18", "19", "20", "21", "22", "23",
1213 "24", "25", "26", "27", "28", "29", "30", "31",
1215 "lr", "ctr", "ca", "ap",
1217 "0", "1", "2", "3", "4", "5", "6", "7",
1218 /* vrsave vscr sfp */
1219 "vrsave", "vscr", "sfp",
1222 #ifdef TARGET_REGNAMES
1223 static const char alt_reg_names
[][8] =
1226 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1227 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1228 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1229 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1231 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1232 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1233 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1234 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1236 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1237 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1238 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1239 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1241 "lr", "ctr", "ca", "ap",
1243 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1244 /* vrsave vscr sfp */
1245 "vrsave", "vscr", "sfp",
1249 /* Table of valid machine attributes. */
1251 static const struct attribute_spec rs6000_attribute_table
[] =
1253 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1254 affects_type_identity, handler, exclude } */
1255 { "altivec", 1, 1, false, true, false, false,
1256 rs6000_handle_altivec_attribute
, NULL
},
1257 { "longcall", 0, 0, false, true, true, false,
1258 rs6000_handle_longcall_attribute
, NULL
},
1259 { "shortcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute
, NULL
},
1261 { "ms_struct", 0, 0, false, false, false, false,
1262 rs6000_handle_struct_attribute
, NULL
},
1263 { "gcc_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute
, NULL
},
1265 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1266 SUBTARGET_ATTRIBUTE_TABLE
,
1268 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1271 #ifndef TARGET_PROFILE_KERNEL
1272 #define TARGET_PROFILE_KERNEL 0
1275 /* Initialize the GCC target structure. */
1276 #undef TARGET_ATTRIBUTE_TABLE
1277 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1278 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1279 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1280 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1281 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1283 #undef TARGET_ASM_ALIGNED_DI_OP
1284 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1286 /* Default unaligned ops are only provided for ELF. Find the ops needed
1287 for non-ELF systems. */
1288 #ifndef OBJECT_FORMAT_ELF
1290 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1292 #undef TARGET_ASM_UNALIGNED_HI_OP
1293 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1294 #undef TARGET_ASM_UNALIGNED_SI_OP
1295 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1296 #undef TARGET_ASM_UNALIGNED_DI_OP
1297 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1300 #undef TARGET_ASM_UNALIGNED_HI_OP
1301 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1302 #undef TARGET_ASM_UNALIGNED_SI_OP
1303 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1304 #undef TARGET_ASM_UNALIGNED_DI_OP
1305 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1306 #undef TARGET_ASM_ALIGNED_DI_OP
1307 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1311 /* This hook deals with fixups for relocatable code and DI-mode objects
1313 #undef TARGET_ASM_INTEGER
1314 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1316 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1317 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1318 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1321 #undef TARGET_SET_UP_BY_PROLOGUE
1322 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1324 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1325 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1326 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1327 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1328 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1330 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1334 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1337 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1338 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1340 #undef TARGET_INTERNAL_ARG_POINTER
1341 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1343 #undef TARGET_HAVE_TLS
1344 #define TARGET_HAVE_TLS HAVE_AS_TLS
1346 #undef TARGET_CANNOT_FORCE_CONST_MEM
1347 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1349 #undef TARGET_DELEGITIMIZE_ADDRESS
1350 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1352 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1353 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1355 #undef TARGET_LEGITIMATE_COMBINED_INSN
1356 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1358 #undef TARGET_ASM_FUNCTION_PROLOGUE
1359 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1360 #undef TARGET_ASM_FUNCTION_EPILOGUE
1361 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1363 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1364 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1366 #undef TARGET_LEGITIMIZE_ADDRESS
1367 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1369 #undef TARGET_SCHED_VARIABLE_ISSUE
1370 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1372 #undef TARGET_SCHED_ISSUE_RATE
1373 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1374 #undef TARGET_SCHED_ADJUST_COST
1375 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1376 #undef TARGET_SCHED_ADJUST_PRIORITY
1377 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1378 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1379 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1380 #undef TARGET_SCHED_INIT
1381 #define TARGET_SCHED_INIT rs6000_sched_init
1382 #undef TARGET_SCHED_FINISH
1383 #define TARGET_SCHED_FINISH rs6000_sched_finish
1384 #undef TARGET_SCHED_REORDER
1385 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1386 #undef TARGET_SCHED_REORDER2
1387 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1389 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1390 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1392 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1393 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1395 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1396 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1397 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1398 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1399 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1400 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1401 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1402 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1404 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1405 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1407 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1408 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1409 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1410 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1411 rs6000_builtin_support_vector_misalignment
1412 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1413 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1414 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1415 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1416 rs6000_builtin_vectorization_cost
1417 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1418 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1419 rs6000_preferred_simd_mode
1420 #undef TARGET_VECTORIZE_INIT_COST
1421 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1422 #undef TARGET_VECTORIZE_ADD_STMT_COST
1423 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1424 #undef TARGET_VECTORIZE_FINISH_COST
1425 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1426 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1427 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1429 #undef TARGET_LOOP_UNROLL_ADJUST
1430 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1432 #undef TARGET_INIT_BUILTINS
1433 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1434 #undef TARGET_BUILTIN_DECL
1435 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1437 #undef TARGET_FOLD_BUILTIN
1438 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1439 #undef TARGET_GIMPLE_FOLD_BUILTIN
1440 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1442 #undef TARGET_EXPAND_BUILTIN
1443 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1445 #undef TARGET_MANGLE_TYPE
1446 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1448 #undef TARGET_INIT_LIBFUNCS
1449 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1452 #undef TARGET_BINDS_LOCAL_P
1453 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1456 #undef TARGET_MS_BITFIELD_LAYOUT_P
1457 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1459 #undef TARGET_ASM_OUTPUT_MI_THUNK
1460 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1462 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1463 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1466 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1468 #undef TARGET_REGISTER_MOVE_COST
1469 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1470 #undef TARGET_MEMORY_MOVE_COST
1471 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1472 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1473 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1474 rs6000_ira_change_pseudo_allocno_class
1475 #undef TARGET_CANNOT_COPY_INSN_P
1476 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1477 #undef TARGET_RTX_COSTS
1478 #define TARGET_RTX_COSTS rs6000_rtx_costs
1479 #undef TARGET_ADDRESS_COST
1480 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1481 #undef TARGET_INSN_COST
1482 #define TARGET_INSN_COST rs6000_insn_cost
1484 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1485 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1487 #undef TARGET_PROMOTE_FUNCTION_MODE
1488 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1490 #undef TARGET_RETURN_IN_MEMORY
1491 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1493 #undef TARGET_RETURN_IN_MSB
1494 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1496 #undef TARGET_SETUP_INCOMING_VARARGS
1497 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1499 /* Always strict argument naming on rs6000. */
1500 #undef TARGET_STRICT_ARGUMENT_NAMING
1501 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1502 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1503 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1504 #undef TARGET_SPLIT_COMPLEX_ARG
1505 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1506 #undef TARGET_MUST_PASS_IN_STACK
1507 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1508 #undef TARGET_PASS_BY_REFERENCE
1509 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1510 #undef TARGET_ARG_PARTIAL_BYTES
1511 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1512 #undef TARGET_FUNCTION_ARG_ADVANCE
1513 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1514 #undef TARGET_FUNCTION_ARG
1515 #define TARGET_FUNCTION_ARG rs6000_function_arg
1516 #undef TARGET_FUNCTION_ARG_PADDING
1517 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1518 #undef TARGET_FUNCTION_ARG_BOUNDARY
1519 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1521 #undef TARGET_BUILD_BUILTIN_VA_LIST
1522 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1524 #undef TARGET_EXPAND_BUILTIN_VA_START
1525 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1527 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1528 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1530 #undef TARGET_EH_RETURN_FILTER_MODE
1531 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1533 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1534 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1536 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1537 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1539 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1540 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1542 #undef TARGET_FLOATN_MODE
1543 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1545 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1546 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1548 #undef TARGET_MD_ASM_ADJUST
1549 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1551 #undef TARGET_OPTION_OVERRIDE
1552 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1554 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1555 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1556 rs6000_builtin_vectorized_function
1558 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1559 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1560 rs6000_builtin_md_vectorized_function
1562 #undef TARGET_STACK_PROTECT_GUARD
1563 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1566 #undef TARGET_STACK_PROTECT_FAIL
1567 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1571 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1572 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1575 /* Use a 32-bit anchor range. This leads to sequences like:
1577 addis tmp,anchor,high
1580 where tmp itself acts as an anchor, and can be shared between
1581 accesses to the same 64k page. */
1582 #undef TARGET_MIN_ANCHOR_OFFSET
1583 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1584 #undef TARGET_MAX_ANCHOR_OFFSET
1585 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1586 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1587 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1588 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1589 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1591 #undef TARGET_BUILTIN_RECIPROCAL
1592 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1594 #undef TARGET_SECONDARY_RELOAD
1595 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1596 #undef TARGET_SECONDARY_MEMORY_NEEDED
1597 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1598 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1599 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1601 #undef TARGET_LEGITIMATE_ADDRESS_P
1602 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1604 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1605 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1607 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1608 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1610 #undef TARGET_CAN_ELIMINATE
1611 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1613 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1614 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1616 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1617 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1619 #undef TARGET_TRAMPOLINE_INIT
1620 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1622 #undef TARGET_FUNCTION_VALUE
1623 #define TARGET_FUNCTION_VALUE rs6000_function_value
1625 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1626 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1628 #undef TARGET_OPTION_SAVE
1629 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1631 #undef TARGET_OPTION_RESTORE
1632 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1634 #undef TARGET_OPTION_PRINT
1635 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1637 #undef TARGET_CAN_INLINE_P
1638 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1640 #undef TARGET_SET_CURRENT_FUNCTION
1641 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1643 #undef TARGET_LEGITIMATE_CONSTANT_P
1644 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1646 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1647 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1649 #undef TARGET_CAN_USE_DOLOOP_P
1650 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1652 #undef TARGET_PREDICT_DOLOOP_P
1653 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1655 #undef TARGET_HAVE_COUNT_REG_DECR_P
1656 #define TARGET_HAVE_COUNT_REG_DECR_P true
1658 /* 1000000000 is infinite cost in IVOPTs. */
1659 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1660 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1662 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1663 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1665 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1666 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1668 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1669 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1670 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1671 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1672 #undef TARGET_UNWIND_WORD_MODE
1673 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1675 #undef TARGET_OFFLOAD_OPTIONS
1676 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1678 #undef TARGET_C_MODE_FOR_SUFFIX
1679 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1681 #undef TARGET_INVALID_BINARY_OP
1682 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1684 #undef TARGET_OPTAB_SUPPORTED_P
1685 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1687 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1688 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1690 #undef TARGET_COMPARE_VERSION_PRIORITY
1691 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1693 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1694 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1695 rs6000_generate_version_dispatcher_body
1697 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1698 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1699 rs6000_get_function_versions_dispatcher
1701 #undef TARGET_OPTION_FUNCTION_VERSIONS
1702 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1704 #undef TARGET_HARD_REGNO_NREGS
1705 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1706 #undef TARGET_HARD_REGNO_MODE_OK
1707 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1709 #undef TARGET_MODES_TIEABLE_P
1710 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1712 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1713 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1714 rs6000_hard_regno_call_part_clobbered
1716 #undef TARGET_SLOW_UNALIGNED_ACCESS
1717 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1719 #undef TARGET_CAN_CHANGE_MODE_CLASS
1720 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1722 #undef TARGET_CONSTANT_ALIGNMENT
1723 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1725 #undef TARGET_STARTING_FRAME_OFFSET
1726 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1728 #if TARGET_ELF && RS6000_WEAK
1729 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1730 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1733 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1734 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1736 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1737 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1740 /* Processor table. */
1743 const char *const name
; /* Canonical processor name. */
1744 const enum processor_type processor
; /* Processor type enum value. */
1745 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1748 static struct rs6000_ptt
const processor_target_table
[] =
1750 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1751 #include "rs6000-cpus.def"
1755 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1759 rs6000_cpu_name_lookup (const char *name
)
1765 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1766 if (! strcmp (name
, processor_target_table
[i
].name
))
1774 /* Return number of consecutive hard regs needed starting at reg REGNO
1775 to hold something of mode MODE.
1776 This is ordinarily the length in words of a value of mode MODE
1777 but can be less for certain modes in special long registers.
1779 POWER and PowerPC GPRs hold 32 bits worth;
1780 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1783 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1785 unsigned HOST_WIDE_INT reg_size
;
1787 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1788 128-bit floating point that can go in vector registers, which has VSX
1789 memory addressing. */
1790 if (FP_REGNO_P (regno
))
1791 reg_size
= (VECTOR_MEM_VSX_P (mode
) || FLOAT128_VECTOR_P (mode
)
1792 ? UNITS_PER_VSX_WORD
1793 : UNITS_PER_FP_WORD
);
1795 else if (ALTIVEC_REGNO_P (regno
))
1796 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1799 reg_size
= UNITS_PER_WORD
;
1801 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1804 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1807 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1809 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1811 if (COMPLEX_MODE_P (mode
))
1812 mode
= GET_MODE_INNER (mode
);
1814 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1815 register combinations, and use PTImode where we need to deal with quad
1816 word memory operations. Don't allow quad words in the argument or frame
1817 pointer registers, just registers 0..31. */
1818 if (mode
== PTImode
)
1819 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1820 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1821 && ((regno
& 1) == 0));
1823 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1824 implementations. Don't allow an item to be split between a FP register
1825 and an Altivec register. Allow TImode in all VSX registers if the user
1827 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1828 && (VECTOR_MEM_VSX_P (mode
)
1829 || FLOAT128_VECTOR_P (mode
)
1830 || reg_addr
[mode
].scalar_in_vmx_p
1832 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1834 if (FP_REGNO_P (regno
))
1835 return FP_REGNO_P (last_regno
);
1837 if (ALTIVEC_REGNO_P (regno
))
1839 if (GET_MODE_SIZE (mode
) != 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1842 return ALTIVEC_REGNO_P (last_regno
);
1846 /* The GPRs can hold any mode, but values bigger than one register
1847 cannot go past R31. */
1848 if (INT_REGNO_P (regno
))
1849 return INT_REGNO_P (last_regno
);
1851 /* The float registers (except for VSX vector modes) can only hold floating
1852 modes and DImode. */
1853 if (FP_REGNO_P (regno
))
1855 if (FLOAT128_VECTOR_P (mode
))
1858 if (SCALAR_FLOAT_MODE_P (mode
)
1859 && (mode
!= TDmode
|| (regno
% 2) == 0)
1860 && FP_REGNO_P (last_regno
))
1863 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1865 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1868 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1871 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1878 /* The CR register can only hold CC modes. */
1879 if (CR_REGNO_P (regno
))
1880 return GET_MODE_CLASS (mode
) == MODE_CC
;
1882 if (CA_REGNO_P (regno
))
1883 return mode
== Pmode
|| mode
== SImode
;
1885 /* AltiVec only in AldyVec registers. */
1886 if (ALTIVEC_REGNO_P (regno
))
1887 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1888 || mode
== V1TImode
);
1890 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1891 and it must be able to fit within the register set. */
1893 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1896 /* Implement TARGET_HARD_REGNO_NREGS. */
1899 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1901 return rs6000_hard_regno_nregs
[mode
][regno
];
1904 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1907 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1909 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1912 /* Implement TARGET_MODES_TIEABLE_P.
1914 PTImode cannot tie with other modes because PTImode is restricted to even
1915 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1918 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1919 128-bit floating point on VSX systems ties with other vectors. */
1922 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1924 if (mode1
== PTImode
)
1925 return mode2
== PTImode
;
1926 if (mode2
== PTImode
)
1929 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1930 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1931 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1934 if (SCALAR_FLOAT_MODE_P (mode1
))
1935 return SCALAR_FLOAT_MODE_P (mode2
);
1936 if (SCALAR_FLOAT_MODE_P (mode2
))
1939 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1940 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1941 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1947 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1950 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1955 && GET_MODE_SIZE (mode
) > 4
1956 && INT_REGNO_P (regno
))
1960 && FP_REGNO_P (regno
)
1961 && GET_MODE_SIZE (mode
) > 8
1962 && !FLOAT128_2REG_P (mode
))
1968 /* Print interesting facts about registers. */
1970 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
1974 for (r
= first_regno
; r
<= last_regno
; ++r
)
1976 const char *comma
= "";
1979 if (first_regno
== last_regno
)
1980 fprintf (stderr
, "%s:\t", reg_name
);
1982 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
1985 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
1986 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
1990 fprintf (stderr
, ",\n\t");
1995 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
1996 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
1997 rs6000_hard_regno_nregs
[m
][r
]);
1999 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2004 if (call_used_or_fixed_reg_p (r
))
2008 fprintf (stderr
, ",\n\t");
2013 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2021 fprintf (stderr
, ",\n\t");
2026 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2032 fprintf (stderr
, ",\n\t");
2036 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2037 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2042 fprintf (stderr
, ",\n\t");
2046 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2051 rs6000_debug_vector_unit (enum rs6000_vector v
)
2057 case VECTOR_NONE
: ret
= "none"; break;
2058 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2059 case VECTOR_VSX
: ret
= "vsx"; break;
2060 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2061 default: ret
= "unknown"; break;
2067 /* Inner function printing just the address mask for a particular reload
2069 DEBUG_FUNCTION
char *
2070 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2075 if ((mask
& RELOAD_REG_VALID
) != 0)
2077 else if (keep_spaces
)
2080 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2082 else if (keep_spaces
)
2085 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2087 else if (keep_spaces
)
2090 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2092 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2094 else if (keep_spaces
)
2097 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2099 else if (keep_spaces
)
2102 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2104 else if (keep_spaces
)
2107 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2109 else if (keep_spaces
)
2117 /* Print the address masks in a human readble fashion. */
2119 rs6000_debug_print_mode (ssize_t m
)
2124 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2125 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2126 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2127 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2129 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2130 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2132 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2133 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2134 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2138 spaces
+= sizeof (" Reload=sl") - 1;
2140 if (reg_addr
[m
].scalar_in_vmx_p
)
2142 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2146 spaces
+= sizeof (" Upper=y") - 1;
2148 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2149 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2151 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2153 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2154 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2157 fputs ("\n", stderr
);
2160 #define DEBUG_FMT_ID "%-32s= "
2161 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2162 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2163 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2165 /* Print various interesting information with -mdebug=reg. */
2167 rs6000_debug_reg_global (void)
2169 static const char *const tf
[2] = { "false", "true" };
2170 const char *nl
= (const char *)0;
2173 char costly_num
[20];
2175 char flags_buffer
[40];
2176 const char *costly_str
;
2177 const char *nop_str
;
2178 const char *trace_str
;
2179 const char *abi_str
;
2180 const char *cmodel_str
;
2181 struct cl_target_option cl_opts
;
2183 /* Modes we want tieable information on. */
2184 static const machine_mode print_tieable_modes
[] = {
2218 /* Virtual regs we are interested in. */
2219 const static struct {
2220 int regno
; /* register number. */
2221 const char *name
; /* register name. */
2222 } virtual_regs
[] = {
2223 { STACK_POINTER_REGNUM
, "stack pointer:" },
2224 { TOC_REGNUM
, "toc: " },
2225 { STATIC_CHAIN_REGNUM
, "static chain: " },
2226 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2227 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2228 { ARG_POINTER_REGNUM
, "arg pointer: " },
2229 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2230 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2231 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2232 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2233 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2234 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2235 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2236 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2237 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2238 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2241 fputs ("\nHard register information:\n", stderr
);
2242 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2243 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2244 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2247 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2248 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2249 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2250 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2251 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2252 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2254 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2255 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2256 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2260 "d reg_class = %s\n"
2261 "f reg_class = %s\n"
2262 "v reg_class = %s\n"
2263 "wa reg_class = %s\n"
2264 "we reg_class = %s\n"
2265 "wr reg_class = %s\n"
2266 "wx reg_class = %s\n"
2267 "wA reg_class = %s\n"
2269 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2270 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2271 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2272 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2273 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2274 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2275 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2276 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2279 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2280 rs6000_debug_print_mode (m
);
2282 fputs ("\n", stderr
);
2284 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2286 machine_mode mode1
= print_tieable_modes
[m1
];
2287 bool first_time
= true;
2289 nl
= (const char *)0;
2290 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2292 machine_mode mode2
= print_tieable_modes
[m2
];
2293 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2297 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2302 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2307 fputs ("\n", stderr
);
2313 if (rs6000_recip_control
)
2315 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2317 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2318 if (rs6000_recip_bits
[m
])
2321 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2323 (RS6000_RECIP_AUTO_RE_P (m
)
2325 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2326 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2328 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2331 fputs ("\n", stderr
);
2334 if (rs6000_cpu_index
>= 0)
2336 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2338 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2340 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2341 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2344 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2346 if (rs6000_tune_index
>= 0)
2348 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2350 = processor_target_table
[rs6000_tune_index
].target_enable
;
2352 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2353 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2356 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2358 cl_target_option_save (&cl_opts
, &global_options
);
2359 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2362 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2363 rs6000_isa_flags_explicit
);
2365 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2366 rs6000_builtin_mask
);
2368 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2370 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2371 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2373 switch (rs6000_sched_costly_dep
)
2375 case max_dep_latency
:
2376 costly_str
= "max_dep_latency";
2380 costly_str
= "no_dep_costly";
2383 case all_deps_costly
:
2384 costly_str
= "all_deps_costly";
2387 case true_store_to_load_dep_costly
:
2388 costly_str
= "true_store_to_load_dep_costly";
2391 case store_to_load_dep_costly
:
2392 costly_str
= "store_to_load_dep_costly";
2396 costly_str
= costly_num
;
2397 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2401 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2403 switch (rs6000_sched_insert_nops
)
2405 case sched_finish_regroup_exact
:
2406 nop_str
= "sched_finish_regroup_exact";
2409 case sched_finish_pad_groups
:
2410 nop_str
= "sched_finish_pad_groups";
2413 case sched_finish_none
:
2414 nop_str
= "sched_finish_none";
2419 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2423 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2425 switch (rs6000_sdata
)
2432 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2436 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2440 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2445 switch (rs6000_traceback
)
2447 case traceback_default
: trace_str
= "default"; break;
2448 case traceback_none
: trace_str
= "none"; break;
2449 case traceback_part
: trace_str
= "part"; break;
2450 case traceback_full
: trace_str
= "full"; break;
2451 default: trace_str
= "unknown"; break;
2454 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2456 switch (rs6000_current_cmodel
)
2458 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2459 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2460 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2461 default: cmodel_str
= "unknown"; break;
2464 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2466 switch (rs6000_current_abi
)
2468 case ABI_NONE
: abi_str
= "none"; break;
2469 case ABI_AIX
: abi_str
= "aix"; break;
2470 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2471 case ABI_V4
: abi_str
= "V4"; break;
2472 case ABI_DARWIN
: abi_str
= "darwin"; break;
2473 default: abi_str
= "unknown"; break;
2476 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2478 if (rs6000_altivec_abi
)
2479 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2481 if (rs6000_darwin64_abi
)
2482 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2484 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2485 (TARGET_SOFT_FLOAT
? "true" : "false"));
2487 if (TARGET_LINK_STACK
)
2488 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2490 if (TARGET_P8_FUSION
)
2494 strcpy (options
, "power8");
2495 if (TARGET_P8_FUSION_SIGN
)
2496 strcat (options
, ", sign");
2498 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2501 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2502 TARGET_SECURE_PLT
? "secure" : "bss");
2503 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2504 aix_struct_return
? "aix" : "sysv");
2505 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2506 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2507 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2508 tf
[!!rs6000_align_branch_targets
]);
2509 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2510 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2511 rs6000_long_double_type_size
);
2512 if (rs6000_long_double_type_size
> 64)
2514 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2515 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2516 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2517 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2519 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2520 (int)rs6000_sched_restricted_insns_priority
);
2521 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2523 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2524 (int)RS6000_BUILTIN_COUNT
);
2526 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2527 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2530 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2531 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2533 if (TARGET_DIRECT_MOVE_128
)
2534 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2535 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2539 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2540 legitimate address support to figure out the appropriate addressing to
2544 rs6000_setup_reg_addr_masks (void)
2546 ssize_t rc
, reg
, m
, nregs
;
2547 addr_mask_type any_addr_mask
, addr_mask
;
2549 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2551 machine_mode m2
= (machine_mode
) m
;
2552 bool complex_p
= false;
2553 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2556 if (COMPLEX_MODE_P (m2
))
2559 m2
= GET_MODE_INNER (m2
);
2562 msize
= GET_MODE_SIZE (m2
);
2564 /* SDmode is special in that we want to access it only via REG+REG
2565 addressing on power7 and above, since we want to use the LFIWZX and
2566 STFIWZX instructions to load it. */
2567 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2570 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2573 reg
= reload_reg_map
[rc
].reg
;
2575 /* Can mode values go in the GPR/FPR/Altivec registers? */
2576 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2578 bool small_int_vsx_p
= (small_int_p
2579 && (rc
== RELOAD_REG_FPR
2580 || rc
== RELOAD_REG_VMX
));
2582 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2583 addr_mask
|= RELOAD_REG_VALID
;
2585 /* Indicate if the mode takes more than 1 physical register. If
2586 it takes a single register, indicate it can do REG+REG
2587 addressing. Small integers in VSX registers can only do
2588 REG+REG addressing. */
2589 if (small_int_vsx_p
)
2590 addr_mask
|= RELOAD_REG_INDEXED
;
2591 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2592 addr_mask
|= RELOAD_REG_MULTIPLE
;
2594 addr_mask
|= RELOAD_REG_INDEXED
;
2596 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2597 addressing. If we allow scalars into Altivec registers,
2598 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2600 For VSX systems, we don't allow update addressing for
2601 DFmode/SFmode if those registers can go in both the
2602 traditional floating point registers and Altivec registers.
2603 The load/store instructions for the Altivec registers do not
2604 have update forms. If we allowed update addressing, it seems
2605 to break IV-OPT code using floating point if the index type is
2606 int instead of long (PR target/81550 and target/84042). */
2609 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2611 && !VECTOR_MODE_P (m2
)
2612 && !FLOAT128_VECTOR_P (m2
)
2614 && (m
!= E_DFmode
|| !TARGET_VSX
)
2615 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2616 && !small_int_vsx_p
)
2618 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2620 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2621 we don't allow PRE_MODIFY for some multi-register
2626 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2630 if (TARGET_POWERPC64
)
2631 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2636 if (TARGET_HARD_FLOAT
)
2637 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2643 /* GPR and FPR registers can do REG+OFFSET addressing, except
2644 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2645 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2646 if ((addr_mask
!= 0) && !indexed_only_p
2648 && (rc
== RELOAD_REG_GPR
2649 || ((msize
== 8 || m2
== SFmode
)
2650 && (rc
== RELOAD_REG_FPR
2651 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2652 addr_mask
|= RELOAD_REG_OFFSET
;
2654 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2655 instructions are enabled. The offset for 128-bit VSX registers is
2656 only 12-bits. While GPRs can handle the full offset range, VSX
2657 registers can only handle the restricted range. */
2658 else if ((addr_mask
!= 0) && !indexed_only_p
2659 && msize
== 16 && TARGET_P9_VECTOR
2660 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2661 || (m2
== TImode
&& TARGET_VSX
)))
2663 addr_mask
|= RELOAD_REG_OFFSET
;
2664 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2665 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2668 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2669 addressing on 128-bit types. */
2670 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2671 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2672 addr_mask
|= RELOAD_REG_AND_M16
;
2674 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2675 any_addr_mask
|= addr_mask
;
2678 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2683 /* Initialize the various global tables that are based on register size. */
2685 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2691 /* Precalculate REGNO_REG_CLASS. */
2692 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2693 for (r
= 1; r
< 32; ++r
)
2694 rs6000_regno_regclass
[r
] = BASE_REGS
;
2696 for (r
= 32; r
< 64; ++r
)
2697 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2699 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2700 rs6000_regno_regclass
[r
] = NO_REGS
;
2702 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2703 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2705 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2706 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2707 rs6000_regno_regclass
[r
] = CR_REGS
;
2709 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2710 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2711 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2712 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2713 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2714 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2715 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2717 /* Precalculate register class to simpler reload register class. We don't
2718 need all of the register classes that are combinations of different
2719 classes, just the simple ones that have constraint letters. */
2720 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2721 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2723 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2724 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2725 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2726 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2727 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2728 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2729 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2730 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2731 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2732 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2736 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2737 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2741 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2742 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2745 /* Precalculate the valid memory formats as well as the vector information,
2746 this must be set up before the rs6000_hard_regno_nregs_internal calls
2748 gcc_assert ((int)VECTOR_NONE
== 0);
2749 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2750 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2752 gcc_assert ((int)CODE_FOR_nothing
== 0);
2753 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2755 gcc_assert ((int)NO_REGS
== 0);
2756 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2758 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2759 believes it can use native alignment or still uses 128-bit alignment. */
2760 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2771 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2772 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2773 if (TARGET_FLOAT128_TYPE
)
2775 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2776 rs6000_vector_align
[KFmode
] = 128;
2778 if (FLOAT128_IEEE_P (TFmode
))
2780 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2781 rs6000_vector_align
[TFmode
] = 128;
2785 /* V2DF mode, VSX only. */
2788 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2789 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2790 rs6000_vector_align
[V2DFmode
] = align64
;
2793 /* V4SF mode, either VSX or Altivec. */
2796 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2797 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2798 rs6000_vector_align
[V4SFmode
] = align32
;
2800 else if (TARGET_ALTIVEC
)
2802 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2803 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2804 rs6000_vector_align
[V4SFmode
] = align32
;
2807 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2811 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2812 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2813 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2814 rs6000_vector_align
[V4SImode
] = align32
;
2815 rs6000_vector_align
[V8HImode
] = align32
;
2816 rs6000_vector_align
[V16QImode
] = align32
;
2820 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2821 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2822 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2826 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2827 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2828 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2832 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2833 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2836 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2837 rs6000_vector_unit
[V2DImode
]
2838 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2839 rs6000_vector_align
[V2DImode
] = align64
;
2841 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2842 rs6000_vector_unit
[V1TImode
]
2843 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2844 rs6000_vector_align
[V1TImode
] = 128;
2847 /* DFmode, see if we want to use the VSX unit. Memory is handled
2848 differently, so don't set rs6000_vector_mem. */
2851 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2852 rs6000_vector_align
[DFmode
] = 64;
2855 /* SFmode, see if we want to use the VSX unit. */
2856 if (TARGET_P8_VECTOR
)
2858 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2859 rs6000_vector_align
[SFmode
] = 32;
2862 /* Allow TImode in VSX register and set the VSX memory macros. */
2865 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2866 rs6000_vector_align
[TImode
] = align64
;
2869 /* Register class constraints for the constraints that depend on compile
2870 switches. When the VSX code was added, different constraints were added
2871 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2872 of the VSX registers are used. The register classes for scalar floating
2873 point types is set, based on whether we allow that type into the upper
2874 (Altivec) registers. GCC has register classes to target the Altivec
2875 registers for load/store operations, to select using a VSX memory
2876 operation instead of the traditional floating point operation. The
2879 d - Register class to use with traditional DFmode instructions.
2880 f - Register class to use with traditional SFmode instructions.
2881 v - Altivec register.
2882 wa - Any VSX register.
2883 wc - Reserved to represent individual CR bits (used in LLVM).
2884 wn - always NO_REGS.
2885 wr - GPR if 64-bit mode is permitted.
2886 wx - Float register if we can do 32-bit int stores. */
2888 if (TARGET_HARD_FLOAT
)
2890 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
2891 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
2895 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2897 /* Add conditional constraints based on various options, to allow us to
2898 collapse multiple insn patterns. */
2900 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2902 if (TARGET_POWERPC64
)
2904 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2905 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2909 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2911 /* Support for new direct moves (ISA 3.0 + 64bit). */
2912 if (TARGET_DIRECT_MOVE_128
)
2913 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2915 /* Set up the reload helper and direct move functions. */
2916 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2920 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2921 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2922 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2923 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2924 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2925 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2926 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2927 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
2928 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
2929 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
2930 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
2931 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
2932 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
2933 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
2934 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
2935 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
2936 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
2937 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
2938 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
2939 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
2941 if (FLOAT128_VECTOR_P (KFmode
))
2943 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
2944 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
2947 if (FLOAT128_VECTOR_P (TFmode
))
2949 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
2950 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
2953 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2955 if (TARGET_NO_SDMODE_STACK
)
2957 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
2958 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
2963 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
2964 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
2967 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
2969 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
2970 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
2971 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
2972 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
2973 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
2974 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
2975 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
2976 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
2977 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
2979 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
2980 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
2981 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
2982 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
2983 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
2984 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
2985 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
2986 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
2987 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
2989 if (FLOAT128_VECTOR_P (KFmode
))
2991 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
2992 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
2995 if (FLOAT128_VECTOR_P (TFmode
))
2997 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
2998 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3004 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3005 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3006 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3007 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3008 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3009 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3010 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3011 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3012 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3013 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3014 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3015 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3016 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3017 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3018 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3019 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3020 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3021 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3022 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3023 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3025 if (FLOAT128_VECTOR_P (KFmode
))
3027 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3028 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3031 if (FLOAT128_IEEE_P (TFmode
))
3033 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3034 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3037 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3039 if (TARGET_NO_SDMODE_STACK
)
3041 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3042 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3047 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3048 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3051 if (TARGET_DIRECT_MOVE
)
3053 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3054 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3055 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3059 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3060 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3062 if (TARGET_P8_VECTOR
)
3064 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3065 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3067 if (TARGET_P9_VECTOR
)
3069 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3070 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3075 /* Precalculate HARD_REGNO_NREGS. */
3076 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3077 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3078 rs6000_hard_regno_nregs
[m
][r
]
3079 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3081 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3082 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3083 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3084 rs6000_hard_regno_mode_ok_p
[m
][r
]
3085 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3087 /* Precalculate CLASS_MAX_NREGS sizes. */
3088 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3092 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3093 reg_size
= UNITS_PER_VSX_WORD
;
3095 else if (c
== ALTIVEC_REGS
)
3096 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3098 else if (c
== FLOAT_REGS
)
3099 reg_size
= UNITS_PER_FP_WORD
;
3102 reg_size
= UNITS_PER_WORD
;
3104 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3106 machine_mode m2
= (machine_mode
)m
;
3107 int reg_size2
= reg_size
;
3109 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3111 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3112 reg_size2
= UNITS_PER_FP_WORD
;
3114 rs6000_class_max_nregs
[m
][c
]
3115 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3119 /* Calculate which modes to automatically generate code to use a the
3120 reciprocal divide and square root instructions. In the future, possibly
3121 automatically generate the instructions even if the user did not specify
3122 -mrecip. The older machines double precision reciprocal sqrt estimate is
3123 not accurate enough. */
3124 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3126 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3128 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3129 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3130 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3131 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3132 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3134 if (TARGET_FRSQRTES
)
3135 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3137 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3138 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3139 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3140 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3141 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3143 if (rs6000_recip_control
)
3145 if (!flag_finite_math_only
)
3146 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3148 if (flag_trapping_math
)
3149 warning (0, "%qs requires %qs or %qs", "-mrecip",
3150 "-fno-trapping-math", "-ffast-math");
3151 if (!flag_reciprocal_math
)
3152 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3154 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3156 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3157 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3158 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3160 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3161 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3162 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3164 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3165 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3166 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3168 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3169 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3170 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3172 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3173 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3174 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3176 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3177 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3178 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3180 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3181 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3182 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3184 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3185 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3186 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3190 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3191 legitimate address support to figure out the appropriate addressing to
3193 rs6000_setup_reg_addr_masks ();
3195 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3197 if (TARGET_DEBUG_REG
)
3198 rs6000_debug_reg_global ();
3200 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3202 "SImode variable mult cost = %d\n"
3203 "SImode constant mult cost = %d\n"
3204 "SImode short constant mult cost = %d\n"
3205 "DImode multipliciation cost = %d\n"
3206 "SImode division cost = %d\n"
3207 "DImode division cost = %d\n"
3208 "Simple fp operation cost = %d\n"
3209 "DFmode multiplication cost = %d\n"
3210 "SFmode division cost = %d\n"
3211 "DFmode division cost = %d\n"
3212 "cache line size = %d\n"
3213 "l1 cache size = %d\n"
3214 "l2 cache size = %d\n"
3215 "simultaneous prefetches = %d\n"
3218 rs6000_cost
->mulsi_const
,
3219 rs6000_cost
->mulsi_const9
,
3227 rs6000_cost
->cache_line_size
,
3228 rs6000_cost
->l1_cache_size
,
3229 rs6000_cost
->l2_cache_size
,
3230 rs6000_cost
->simultaneous_prefetches
);
3235 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3238 darwin_rs6000_override_options (void)
3240 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3242 rs6000_altivec_abi
= 1;
3243 TARGET_ALTIVEC_VRSAVE
= 1;
3244 rs6000_current_abi
= ABI_DARWIN
;
3246 if (DEFAULT_ABI
== ABI_DARWIN
3248 darwin_one_byte_bool
= 1;
3250 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3252 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3253 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3256 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3257 optimisation, and will not work with the most generic case (where the
3258 symbol is undefined external, but there is no symbl stub). */
3260 rs6000_default_long_calls
= 0;
3262 /* ld_classic is (so far) still used for kernel (static) code, and supports
3263 the JBSR longcall / branch islands. */
3266 rs6000_default_long_calls
= 1;
3268 /* Allow a kext author to do -mkernel -mhard-float. */
3269 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3270 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3273 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3275 if (!flag_mkernel
&& !flag_apple_kext
3277 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3278 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3280 /* Unless the user (not the configurer) has explicitly overridden
3281 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3282 G4 unless targeting the kernel. */
3285 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3286 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3287 && ! global_options_set
.x_rs6000_cpu_index
)
3289 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3294 /* If not otherwise specified by a target, make 'long double' equivalent to
3297 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3298 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3301 /* Return the builtin mask of the various options used that could affect which
3302 builtins were used. In the past we used target_flags, but we've run out of
3303 bits, and some options are no longer in target_flags. */
3306 rs6000_builtin_mask_calculate (void)
3308 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3309 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3310 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3311 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3312 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3313 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3314 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3315 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3316 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3317 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3318 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3319 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3320 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3321 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3322 | ((TARGET_POWERPC64
) ? RS6000_BTM_POWERPC64
: 0)
3323 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3324 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3325 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3326 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3327 | ((TARGET_LONG_DOUBLE_128
3328 && TARGET_HARD_FLOAT
3329 && !TARGET_IEEEQUAD
) ? RS6000_BTM_LDBL128
: 0)
3330 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0)
3331 | ((TARGET_FLOAT128_HW
) ? RS6000_BTM_FLOAT128_HW
: 0));
3334 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3335 to clobber the XER[CA] bit because clobbering that bit without telling
3336 the compiler worked just fine with versions of GCC before GCC 5, and
3337 breaking a lot of older code in ways that are hard to track down is
3338 not such a great idea. */
3341 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3342 vec
<const char *> &/*constraints*/,
3343 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3345 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3346 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3350 /* Override command line options.
3352 Combine build-specific configuration information with options
3353 specified on the command line to set various state variables which
3354 influence code generation, optimization, and expansion of built-in
3355 functions. Assure that command-line configuration preferences are
3356 compatible with each other and with the build configuration; issue
3357 warnings while adjusting configuration or error messages while
3358 rejecting configuration.
3360 Upon entry to this function:
3362 This function is called once at the beginning of
3363 compilation, and then again at the start and end of compiling
3364 each section of code that has a different configuration, as
3365 indicated, for example, by adding the
3367 __attribute__((__target__("cpu=power9")))
3369 qualifier to a function definition or, for example, by bracketing
3372 #pragma GCC target("altivec")
3376 #pragma GCC reset_options
3378 directives. Parameter global_init_p is true for the initial
3379 invocation, which initializes global variables, and false for all
3380 subsequent invocations.
3383 Various global state information is assumed to be valid. This
3384 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3385 default CPU specified at build configure time, TARGET_DEFAULT,
3386 representing the default set of option flags for the default
3387 target, and global_options_set.x_rs6000_isa_flags, representing
3388 which options were requested on the command line.
3390 Upon return from this function:
3392 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3393 was set by name on the command line. Additionally, if certain
3394 attributes are automatically enabled or disabled by this function
3395 in order to assure compatibility between options and
3396 configuration, the flags associated with those attributes are
3397 also set. By setting these "explicit bits", we avoid the risk
3398 that other code might accidentally overwrite these particular
3399 attributes with "default values".
3401 The various bits of rs6000_isa_flags are set to indicate the
3402 target options that have been selected for the most current
3403 compilation efforts. This has the effect of also turning on the
3404 associated TARGET_XXX values since these are macros which are
3405 generally defined to test the corresponding bit of the
3406 rs6000_isa_flags variable.
3408 The variable rs6000_builtin_mask is set to represent the target
3409 options for the most current compilation efforts, consistent with
3410 the current contents of rs6000_isa_flags. This variable controls
3411 expansion of built-in functions.
3413 Various other global variables and fields of global structures
3414 (over 50 in all) are initialized to reflect the desired options
3415 for the most current compilation efforts. */
3418 rs6000_option_override_internal (bool global_init_p
)
3422 HOST_WIDE_INT set_masks
;
3423 HOST_WIDE_INT ignore_masks
;
3426 struct cl_target_option
*main_target_opt
3427 = ((global_init_p
|| target_option_default_node
== NULL
)
3428 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3430 /* Print defaults. */
3431 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3432 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3434 /* Remember the explicit arguments. */
3436 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
3438 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3439 library functions, so warn about it. The flag may be useful for
3440 performance studies from time to time though, so don't disable it
3442 if (global_options_set
.x_rs6000_alignment_flags
3443 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3444 && DEFAULT_ABI
== ABI_DARWIN
3446 warning (0, "%qs is not supported for 64-bit Darwin;"
3447 " it is incompatible with the installed C and C++ libraries",
3450 /* Numerous experiment shows that IRA based loop pressure
3451 calculation works better for RTL loop invariant motion on targets
3452 with enough (>= 32) registers. It is an expensive optimization.
3453 So it is on only for peak performance. */
3454 if (optimize
>= 3 && global_init_p
3455 && !global_options_set
.x_flag_ira_loop_pressure
)
3456 flag_ira_loop_pressure
= 1;
3458 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3459 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3460 options were already specified. */
3461 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3462 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
3463 flag_asynchronous_unwind_tables
= 1;
3465 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3466 loop unroller is active. It is only checked during unrolling, so
3467 we can just set it on by default. */
3468 if (!global_options_set
.x_flag_variable_expansion_in_unroller
)
3469 flag_variable_expansion_in_unroller
= 1;
3471 /* Set the pointer size. */
3474 rs6000_pmode
= DImode
;
3475 rs6000_pointer_size
= 64;
3479 rs6000_pmode
= SImode
;
3480 rs6000_pointer_size
= 32;
3483 /* Some OSs don't support saving the high part of 64-bit registers on context
3484 switch. Other OSs don't support saving Altivec registers. On those OSs,
3485 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3486 if the user wants either, the user must explicitly specify them and we
3487 won't interfere with the user's specification. */
3489 set_masks
= POWERPC_MASKS
;
3490 #ifdef OS_MISSING_POWERPC64
3491 if (OS_MISSING_POWERPC64
)
3492 set_masks
&= ~OPTION_MASK_POWERPC64
;
3494 #ifdef OS_MISSING_ALTIVEC
3495 if (OS_MISSING_ALTIVEC
)
3496 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3497 | OTHER_VSX_VECTOR_MASKS
);
3500 /* Don't override by the processor default if given explicitly. */
3501 set_masks
&= ~rs6000_isa_flags_explicit
;
3503 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3504 the cpu in a target attribute or pragma, but did not specify a tuning
3505 option, use the cpu for the tuning option rather than the option specified
3506 with -mtune on the command line. Process a '--with-cpu' configuration
3507 request as an implicit --cpu. */
3508 if (rs6000_cpu_index
>= 0)
3509 cpu_index
= rs6000_cpu_index
;
3510 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3511 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3512 else if (OPTION_TARGET_CPU_DEFAULT
)
3513 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3515 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3516 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3517 with those from the cpu, except for options that were explicitly set. If
3518 we don't have a cpu, do not override the target bits set in
3522 rs6000_cpu_index
= cpu_index
;
3523 rs6000_isa_flags
&= ~set_masks
;
3524 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3529 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3530 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3531 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3532 to using rs6000_isa_flags, we need to do the initialization here.
3534 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3535 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3536 HOST_WIDE_INT flags
;
3538 flags
= TARGET_DEFAULT
;
3541 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3542 const char *default_cpu
= (!TARGET_POWERPC64
3547 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3548 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3550 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3553 if (rs6000_tune_index
>= 0)
3554 tune_index
= rs6000_tune_index
;
3555 else if (cpu_index
>= 0)
3556 rs6000_tune_index
= tune_index
= cpu_index
;
3560 enum processor_type tune_proc
3561 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3564 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3565 if (processor_target_table
[i
].processor
== tune_proc
)
3573 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3575 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3577 gcc_assert (tune_index
>= 0);
3578 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3580 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3581 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3582 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3585 error ("AltiVec not supported in this target");
3588 /* If we are optimizing big endian systems for space, use the load/store
3589 multiple instructions. */
3590 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3591 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3593 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3594 because the hardware doesn't support the instructions used in little
3595 endian mode, and causes an alignment trap. The 750 does not cause an
3596 alignment trap (except when the target is unaligned). */
3598 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3600 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3601 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3602 warning (0, "%qs is not supported on little endian systems",
3606 /* If little-endian, default to -mstrict-align on older processors.
3607 Testing for htm matches power8 and later. */
3608 if (!BYTES_BIG_ENDIAN
3609 && !(processor_target_table
[tune_index
].target_enable
& OPTION_MASK_HTM
))
3610 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3612 if (!rs6000_fold_gimple
)
3614 "gimple folding of rs6000 builtins has been disabled.\n");
3616 /* Add some warnings for VSX. */
3619 const char *msg
= NULL
;
3620 if (!TARGET_HARD_FLOAT
)
3622 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3623 msg
= N_("%<-mvsx%> requires hardware floating point");
3626 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3627 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3630 else if (TARGET_AVOID_XFORM
> 0)
3631 msg
= N_("%<-mvsx%> needs indexed addressing");
3632 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3633 & OPTION_MASK_ALTIVEC
))
3635 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3636 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3638 msg
= N_("%<-mno-altivec%> disables vsx");
3644 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3645 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3649 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3650 the -mcpu setting to enable options that conflict. */
3651 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3652 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3653 | OPTION_MASK_ALTIVEC
3654 | OPTION_MASK_VSX
)) != 0)
3655 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3656 | OPTION_MASK_DIRECT_MOVE
)
3657 & ~rs6000_isa_flags_explicit
);
3659 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3660 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3662 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3663 off all of the options that depend on those flags. */
3664 ignore_masks
= rs6000_disable_incompatible_switches ();
3666 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3667 unless the user explicitly used the -mno-<option> to disable the code. */
3668 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3669 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3670 else if (TARGET_P9_MINMAX
)
3674 if (cpu_index
== PROCESSOR_POWER9
)
3676 /* legacy behavior: allow -mcpu=power9 with certain
3677 capabilities explicitly disabled. */
3678 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3681 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3682 "for <xxx> less than power9", "-mcpu");
3684 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3685 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3686 & rs6000_isa_flags_explicit
))
3687 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3688 were explicitly cleared. */
3689 error ("%qs incompatible with explicitly disabled options",
3692 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3694 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3695 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3696 else if (TARGET_VSX
)
3697 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3698 else if (TARGET_POPCNTD
)
3699 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3700 else if (TARGET_DFP
)
3701 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3702 else if (TARGET_CMPB
)
3703 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3704 else if (TARGET_FPRND
)
3705 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3706 else if (TARGET_POPCNTB
)
3707 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3708 else if (TARGET_ALTIVEC
)
3709 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3711 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3713 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3714 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3715 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3718 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3720 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3721 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3722 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3725 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3727 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3728 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3729 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3732 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3734 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3735 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3736 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3737 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3739 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3740 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3741 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3745 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3747 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3748 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3752 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3754 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3755 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3756 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3759 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3760 silently turn off quad memory mode. */
3761 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3763 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3764 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3766 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3767 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3769 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3770 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3773 /* Non-atomic quad memory load/store are disabled for little endian, since
3774 the words are reversed, but atomic operations can still be done by
3775 swapping the words. */
3776 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3778 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3779 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3782 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
3785 /* Assume if the user asked for normal quad memory instructions, they want
3786 the atomic versions as well, unless they explicity told us not to use quad
3787 word atomic instructions. */
3788 if (TARGET_QUAD_MEMORY
3789 && !TARGET_QUAD_MEMORY_ATOMIC
3790 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
3791 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
3793 /* If we can shrink-wrap the TOC register save separately, then use
3794 -msave-toc-indirect unless explicitly disabled. */
3795 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
3796 && flag_shrink_wrap_separate
3797 && optimize_function_for_speed_p (cfun
))
3798 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
3800 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3801 generating power8 instructions. Power9 does not optimize power8 fusion
3803 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
3805 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
3806 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
3808 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
3811 /* Setting additional fusion flags turns on base fusion. */
3812 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
3814 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
3816 if (TARGET_P8_FUSION_SIGN
)
3817 error ("%qs requires %qs", "-mpower8-fusion-sign",
3820 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
3823 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
3826 /* Power8 does not fuse sign extended loads with the addis. If we are
3827 optimizing at high levels for speed, convert a sign extended load into a
3828 zero extending load, and an explicit sign extension. */
3829 if (TARGET_P8_FUSION
3830 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
3831 && optimize_function_for_speed_p (cfun
)
3833 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
3835 /* ISA 3.0 vector instructions include ISA 2.07. */
3836 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
3838 /* We prefer to not mention undocumented options in
3839 error messages. However, if users have managed to select
3840 power9-vector without selecting power8-vector, they
3841 already know about undocumented flags. */
3842 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
3843 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
3844 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3845 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
3847 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
3848 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3849 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
3853 /* OPTION_MASK_P9_VECTOR is explicit and
3854 OPTION_MASK_P8_VECTOR is not explicit. */
3855 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
3856 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3860 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3861 support. If we only have ISA 2.06 support, and the user did not specify
3862 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3863 but we don't enable the full vectorization support */
3864 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
3865 TARGET_ALLOW_MOVMISALIGN
= 1;
3867 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
3869 if (TARGET_ALLOW_MOVMISALIGN
> 0
3870 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
3871 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3873 TARGET_ALLOW_MOVMISALIGN
= 0;
3876 /* Determine when unaligned vector accesses are permitted, and when
3877 they are preferred over masked Altivec loads. Note that if
3878 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3879 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3881 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
3885 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
3886 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3888 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
3891 else if (!TARGET_ALLOW_MOVMISALIGN
)
3893 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
3894 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3895 "-mallow-movmisalign");
3897 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
3901 /* Use long double size to select the appropriate long double. We use
3902 TYPE_PRECISION to differentiate the 3 different long double types. We map
3903 128 into the precision used for TFmode. */
3904 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
3906 : FLOAT_PRECISION_TFmode
);
3908 /* Set long double size before the IEEE 128-bit tests. */
3909 if (!global_options_set
.x_rs6000_long_double_type_size
)
3911 if (main_target_opt
!= NULL
3912 && (main_target_opt
->x_rs6000_long_double_type_size
3913 != default_long_double_size
))
3914 error ("target attribute or pragma changes %<long double%> size");
3916 rs6000_long_double_type_size
= default_long_double_size
;
3918 else if (rs6000_long_double_type_size
== 128)
3919 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
3920 else if (global_options_set
.x_rs6000_ieeequad
)
3922 if (global_options
.x_rs6000_ieeequad
)
3923 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3925 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3928 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3929 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3930 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3931 those systems will not pick up this default. Warn if the user changes the
3932 default unless -Wno-psabi. */
3933 if (!global_options_set
.x_rs6000_ieeequad
)
3934 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
3938 if (global_options
.x_rs6000_ieeequad
3939 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
3940 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3942 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
&& TARGET_LONG_DOUBLE_128
)
3944 static bool warned_change_long_double
;
3945 if (!warned_change_long_double
)
3947 warned_change_long_double
= true;
3948 if (TARGET_IEEEQUAD
)
3949 warning (OPT_Wpsabi
, "Using IEEE extended precision "
3952 warning (OPT_Wpsabi
, "Using IBM extended precision "
3958 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3959 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3960 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3961 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3962 the keyword as well as the type. */
3963 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
3965 /* IEEE 128-bit floating point requires VSX support. */
3966 if (TARGET_FLOAT128_KEYWORD
)
3970 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
3971 error ("%qs requires VSX support", "%<-mfloat128%>");
3973 TARGET_FLOAT128_TYPE
= 0;
3974 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
3975 | OPTION_MASK_FLOAT128_HW
);
3977 else if (!TARGET_FLOAT128_TYPE
)
3979 TARGET_FLOAT128_TYPE
= 1;
3980 warning (0, "The %<-mfloat128%> option may not be fully supported");
3984 /* Enable the __float128 keyword under Linux by default. */
3985 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
3986 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
3987 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
3989 /* If we have are supporting the float128 type and full ISA 3.0 support,
3990 enable -mfloat128-hardware by default. However, don't enable the
3991 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3992 because sometimes the compiler wants to put things in an integer
3993 container, and if we don't have __int128 support, it is impossible. */
3994 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
3995 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
3996 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
3997 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
3999 if (TARGET_FLOAT128_HW
4000 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4002 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4003 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4005 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4008 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4010 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4011 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4013 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4016 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4017 if (TARGET_PREFIXED_ADDR
&& !TARGET_FUTURE
)
4019 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4020 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4021 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED_ADDR
) != 0)
4022 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4024 rs6000_isa_flags
&= ~(OPTION_MASK_PCREL
| OPTION_MASK_PREFIXED_ADDR
);
4027 /* -mpcrel requires prefixed load/store addressing. */
4028 if (TARGET_PCREL
&& !TARGET_PREFIXED_ADDR
)
4030 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4031 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4033 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4036 /* Print the options after updating the defaults. */
4037 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4038 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4040 /* E500mc does "better" if we inline more aggressively. Respect the
4041 user's opinion, though. */
4042 if (rs6000_block_move_inline_limit
== 0
4043 && (rs6000_tune
== PROCESSOR_PPCE500MC
4044 || rs6000_tune
== PROCESSOR_PPCE500MC64
4045 || rs6000_tune
== PROCESSOR_PPCE5500
4046 || rs6000_tune
== PROCESSOR_PPCE6500
))
4047 rs6000_block_move_inline_limit
= 128;
4049 /* store_one_arg depends on expand_block_move to handle at least the
4050 size of reg_parm_stack_space. */
4051 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4052 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4056 /* If the appropriate debug option is enabled, replace the target hooks
4057 with debug versions that call the real version and then prints
4058 debugging information. */
4059 if (TARGET_DEBUG_COST
)
4061 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4062 targetm
.address_cost
= rs6000_debug_address_cost
;
4063 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4066 if (TARGET_DEBUG_ADDR
)
4068 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4069 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4070 rs6000_secondary_reload_class_ptr
4071 = rs6000_debug_secondary_reload_class
;
4072 targetm
.secondary_memory_needed
4073 = rs6000_debug_secondary_memory_needed
;
4074 targetm
.can_change_mode_class
4075 = rs6000_debug_can_change_mode_class
;
4076 rs6000_preferred_reload_class_ptr
4077 = rs6000_debug_preferred_reload_class
;
4078 rs6000_mode_dependent_address_ptr
4079 = rs6000_debug_mode_dependent_address
;
4082 if (rs6000_veclibabi_name
)
4084 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4085 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4088 error ("unknown vectorization library ABI type (%qs) for "
4089 "%qs switch", rs6000_veclibabi_name
, "-mveclibabi=");
4095 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4096 target attribute or pragma which automatically enables both options,
4097 unless the altivec ABI was set. This is set by default for 64-bit, but
4099 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4101 TARGET_FLOAT128_TYPE
= 0;
4102 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
4103 | OPTION_MASK_FLOAT128_KEYWORD
)
4104 & ~rs6000_isa_flags_explicit
);
4107 /* Enable Altivec ABI for AIX -maltivec. */
4108 if (TARGET_XCOFF
&& (TARGET_ALTIVEC
|| TARGET_VSX
))
4110 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4111 error ("target attribute or pragma changes AltiVec ABI");
4113 rs6000_altivec_abi
= 1;
4116 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4117 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4118 be explicitly overridden in either case. */
4121 if (!global_options_set
.x_rs6000_altivec_abi
4122 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4124 if (main_target_opt
!= NULL
&&
4125 !main_target_opt
->x_rs6000_altivec_abi
)
4126 error ("target attribute or pragma changes AltiVec ABI");
4128 rs6000_altivec_abi
= 1;
4132 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4133 So far, the only darwin64 targets are also MACH-O. */
4135 && DEFAULT_ABI
== ABI_DARWIN
4138 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4139 error ("target attribute or pragma changes darwin64 ABI");
4142 rs6000_darwin64_abi
= 1;
4143 /* Default to natural alignment, for better performance. */
4144 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4148 /* Place FP constants in the constant pool instead of TOC
4149 if section anchors enabled. */
4150 if (flag_section_anchors
4151 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
4152 TARGET_NO_FP_IN_TOC
= 1;
4154 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4155 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4157 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4158 SUBTARGET_OVERRIDE_OPTIONS
;
4160 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4161 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4163 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4164 SUB3TARGET_OVERRIDE_OPTIONS
;
4167 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4168 after the subtarget override options are done. */
4169 if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4171 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4172 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4174 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4177 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4178 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4180 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4181 && rs6000_tune
!= PROCESSOR_POWER5
4182 && rs6000_tune
!= PROCESSOR_POWER6
4183 && rs6000_tune
!= PROCESSOR_POWER7
4184 && rs6000_tune
!= PROCESSOR_POWER8
4185 && rs6000_tune
!= PROCESSOR_POWER9
4186 && rs6000_tune
!= PROCESSOR_FUTURE
4187 && rs6000_tune
!= PROCESSOR_PPCA2
4188 && rs6000_tune
!= PROCESSOR_CELL
4189 && rs6000_tune
!= PROCESSOR_PPC476
);
4190 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4191 || rs6000_tune
== PROCESSOR_POWER5
4192 || rs6000_tune
== PROCESSOR_POWER7
4193 || rs6000_tune
== PROCESSOR_POWER8
);
4194 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4195 || rs6000_tune
== PROCESSOR_POWER5
4196 || rs6000_tune
== PROCESSOR_POWER6
4197 || rs6000_tune
== PROCESSOR_POWER7
4198 || rs6000_tune
== PROCESSOR_POWER8
4199 || rs6000_tune
== PROCESSOR_POWER9
4200 || rs6000_tune
== PROCESSOR_FUTURE
4201 || rs6000_tune
== PROCESSOR_PPCE500MC
4202 || rs6000_tune
== PROCESSOR_PPCE500MC64
4203 || rs6000_tune
== PROCESSOR_PPCE5500
4204 || rs6000_tune
== PROCESSOR_PPCE6500
);
4206 /* Allow debug switches to override the above settings. These are set to -1
4207 in rs6000.opt to indicate the user hasn't directly set the switch. */
4208 if (TARGET_ALWAYS_HINT
>= 0)
4209 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4211 if (TARGET_SCHED_GROUPS
>= 0)
4212 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4214 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4215 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4217 rs6000_sched_restricted_insns_priority
4218 = (rs6000_sched_groups
? 1 : 0);
4220 /* Handle -msched-costly-dep option. */
4221 rs6000_sched_costly_dep
4222 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4224 if (rs6000_sched_costly_dep_str
)
4226 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4227 rs6000_sched_costly_dep
= no_dep_costly
;
4228 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4229 rs6000_sched_costly_dep
= all_deps_costly
;
4230 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4231 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4232 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4233 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4235 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4236 atoi (rs6000_sched_costly_dep_str
));
4239 /* Handle -minsert-sched-nops option. */
4240 rs6000_sched_insert_nops
4241 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4243 if (rs6000_sched_insert_nops_str
)
4245 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4246 rs6000_sched_insert_nops
= sched_finish_none
;
4247 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4248 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4249 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4250 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4252 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4253 atoi (rs6000_sched_insert_nops_str
));
4256 /* Handle stack protector */
4257 if (!global_options_set
.x_rs6000_stack_protector_guard
)
4258 #ifdef TARGET_THREAD_SSP_OFFSET
4259 rs6000_stack_protector_guard
= SSP_TLS
;
4261 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4264 #ifdef TARGET_THREAD_SSP_OFFSET
4265 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4266 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4269 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
4272 const char *str
= rs6000_stack_protector_guard_offset_str
;
4275 long offset
= strtol (str
, &endp
, 0);
4276 if (!*str
|| *endp
|| errno
)
4277 error ("%qs is not a valid number in %qs", str
,
4278 "-mstack-protector-guard-offset=");
4280 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4281 || (TARGET_64BIT
&& (offset
& 3)))
4282 error ("%qs is not a valid offset in %qs", str
,
4283 "-mstack-protector-guard-offset=");
4285 rs6000_stack_protector_guard_offset
= offset
;
4288 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
4290 const char *str
= rs6000_stack_protector_guard_reg_str
;
4291 int reg
= decode_reg_name (str
);
4293 if (!IN_RANGE (reg
, 1, 31))
4294 error ("%qs is not a valid base register in %qs", str
,
4295 "-mstack-protector-guard-reg=");
4297 rs6000_stack_protector_guard_reg
= reg
;
4300 if (rs6000_stack_protector_guard
== SSP_TLS
4301 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4302 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4306 #ifdef TARGET_REGNAMES
4307 /* If the user desires alternate register names, copy in the
4308 alternate names now. */
4309 if (TARGET_REGNAMES
)
4310 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4313 /* Set aix_struct_return last, after the ABI is determined.
4314 If -maix-struct-return or -msvr4-struct-return was explicitly
4315 used, don't override with the ABI default. */
4316 if (!global_options_set
.x_aix_struct_return
)
4317 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4320 /* IBM XL compiler defaults to unsigned bitfields. */
4321 if (TARGET_XL_COMPAT
)
4322 flag_signed_bitfields
= 0;
4325 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4326 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4328 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4330 /* We can only guarantee the availability of DI pseudo-ops when
4331 assembling for 64-bit targets. */
4334 targetm
.asm_out
.aligned_op
.di
= NULL
;
4335 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4339 /* Set branch target alignment, if not optimizing for size. */
4342 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4343 aligned 8byte to avoid misprediction by the branch predictor. */
4344 if (rs6000_tune
== PROCESSOR_TITAN
4345 || rs6000_tune
== PROCESSOR_CELL
)
4347 if (flag_align_functions
&& !str_align_functions
)
4348 str_align_functions
= "8";
4349 if (flag_align_jumps
&& !str_align_jumps
)
4350 str_align_jumps
= "8";
4351 if (flag_align_loops
&& !str_align_loops
)
4352 str_align_loops
= "8";
4354 if (rs6000_align_branch_targets
)
4356 if (flag_align_functions
&& !str_align_functions
)
4357 str_align_functions
= "16";
4358 if (flag_align_jumps
&& !str_align_jumps
)
4359 str_align_jumps
= "16";
4360 if (flag_align_loops
&& !str_align_loops
)
4362 can_override_loop_align
= 1;
4363 str_align_loops
= "16";
4367 if (flag_align_jumps
&& !str_align_jumps
)
4368 str_align_jumps
= "16";
4369 if (flag_align_loops
&& !str_align_loops
)
4370 str_align_loops
= "16";
4373 /* Arrange to save and restore machine status around nested functions. */
4374 init_machine_status
= rs6000_init_machine_status
;
4376 /* We should always be splitting complex arguments, but we can't break
4377 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4378 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4379 targetm
.calls
.split_complex_arg
= NULL
;
4381 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4382 if (DEFAULT_ABI
== ABI_AIX
)
4383 targetm
.calls
.custom_function_descriptors
= 0;
4386 /* Initialize rs6000_cost with the appropriate target costs. */
4388 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4390 switch (rs6000_tune
)
4392 case PROCESSOR_RS64A
:
4393 rs6000_cost
= &rs64a_cost
;
4396 case PROCESSOR_MPCCORE
:
4397 rs6000_cost
= &mpccore_cost
;
4400 case PROCESSOR_PPC403
:
4401 rs6000_cost
= &ppc403_cost
;
4404 case PROCESSOR_PPC405
:
4405 rs6000_cost
= &ppc405_cost
;
4408 case PROCESSOR_PPC440
:
4409 rs6000_cost
= &ppc440_cost
;
4412 case PROCESSOR_PPC476
:
4413 rs6000_cost
= &ppc476_cost
;
4416 case PROCESSOR_PPC601
:
4417 rs6000_cost
= &ppc601_cost
;
4420 case PROCESSOR_PPC603
:
4421 rs6000_cost
= &ppc603_cost
;
4424 case PROCESSOR_PPC604
:
4425 rs6000_cost
= &ppc604_cost
;
4428 case PROCESSOR_PPC604e
:
4429 rs6000_cost
= &ppc604e_cost
;
4432 case PROCESSOR_PPC620
:
4433 rs6000_cost
= &ppc620_cost
;
4436 case PROCESSOR_PPC630
:
4437 rs6000_cost
= &ppc630_cost
;
4440 case PROCESSOR_CELL
:
4441 rs6000_cost
= &ppccell_cost
;
4444 case PROCESSOR_PPC750
:
4445 case PROCESSOR_PPC7400
:
4446 rs6000_cost
= &ppc750_cost
;
4449 case PROCESSOR_PPC7450
:
4450 rs6000_cost
= &ppc7450_cost
;
4453 case PROCESSOR_PPC8540
:
4454 case PROCESSOR_PPC8548
:
4455 rs6000_cost
= &ppc8540_cost
;
4458 case PROCESSOR_PPCE300C2
:
4459 case PROCESSOR_PPCE300C3
:
4460 rs6000_cost
= &ppce300c2c3_cost
;
4463 case PROCESSOR_PPCE500MC
:
4464 rs6000_cost
= &ppce500mc_cost
;
4467 case PROCESSOR_PPCE500MC64
:
4468 rs6000_cost
= &ppce500mc64_cost
;
4471 case PROCESSOR_PPCE5500
:
4472 rs6000_cost
= &ppce5500_cost
;
4475 case PROCESSOR_PPCE6500
:
4476 rs6000_cost
= &ppce6500_cost
;
4479 case PROCESSOR_TITAN
:
4480 rs6000_cost
= &titan_cost
;
4483 case PROCESSOR_POWER4
:
4484 case PROCESSOR_POWER5
:
4485 rs6000_cost
= &power4_cost
;
4488 case PROCESSOR_POWER6
:
4489 rs6000_cost
= &power6_cost
;
4492 case PROCESSOR_POWER7
:
4493 rs6000_cost
= &power7_cost
;
4496 case PROCESSOR_POWER8
:
4497 rs6000_cost
= &power8_cost
;
4500 case PROCESSOR_POWER9
:
4501 case PROCESSOR_FUTURE
:
4502 rs6000_cost
= &power9_cost
;
4505 case PROCESSOR_PPCA2
:
4506 rs6000_cost
= &ppca2_cost
;
4515 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4516 param_simultaneous_prefetches
,
4517 rs6000_cost
->simultaneous_prefetches
);
4518 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4519 param_l1_cache_size
,
4520 rs6000_cost
->l1_cache_size
);
4521 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4522 param_l1_cache_line_size
,
4523 rs6000_cost
->cache_line_size
);
4524 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4525 param_l2_cache_size
,
4526 rs6000_cost
->l2_cache_size
);
4528 /* Increase loop peeling limits based on performance analysis. */
4529 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4530 param_max_peeled_insns
, 400);
4531 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4532 param_max_completely_peeled_insns
, 400);
4534 /* Use the 'model' -fsched-pressure algorithm by default. */
4535 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4536 param_sched_pressure_algorithm
,
4537 SCHED_PRESSURE_MODEL
);
4539 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4540 turns -fweb and -frename-registers on. */
4541 if ((global_options_set
.x_flag_unroll_loops
&& flag_unroll_loops
)
4542 || (global_options_set
.x_flag_unroll_all_loops
4543 && flag_unroll_all_loops
))
4545 if (!global_options_set
.x_unroll_only_small_loops
)
4546 unroll_only_small_loops
= 0;
4547 if (!global_options_set
.x_flag_rename_registers
)
4548 flag_rename_registers
= 1;
4549 if (!global_options_set
.x_flag_web
)
4553 /* If using typedef char *va_list, signal that
4554 __builtin_va_start (&ap, 0) can be optimized to
4555 ap = __builtin_next_arg (0). */
4556 if (DEFAULT_ABI
!= ABI_V4
)
4557 targetm
.expand_builtin_va_start
= NULL
;
4560 /* If not explicitly specified via option, decide whether to generate indexed
4561 load/store instructions. A value of -1 indicates that the
4562 initial value of this variable has not been overwritten. During
4563 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4564 if (TARGET_AVOID_XFORM
== -1)
4565 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4566 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4567 need indexed accesses and the type used is the scalar type of the element
4568 being loaded or stored. */
4569 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4570 && !TARGET_ALTIVEC
);
4572 /* Set the -mrecip options. */
4573 if (rs6000_recip_name
)
4575 char *p
= ASTRDUP (rs6000_recip_name
);
4577 unsigned int mask
, i
;
4580 while ((q
= strtok (p
, ",")) != NULL
)
4591 if (!strcmp (q
, "default"))
4592 mask
= ((TARGET_RECIP_PRECISION
)
4593 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4596 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4597 if (!strcmp (q
, recip_options
[i
].string
))
4599 mask
= recip_options
[i
].mask
;
4603 if (i
== ARRAY_SIZE (recip_options
))
4605 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4613 rs6000_recip_control
&= ~mask
;
4615 rs6000_recip_control
|= mask
;
4619 /* Set the builtin mask of the various options used that could affect which
4620 builtins were used. In the past we used target_flags, but we've run out
4621 of bits, and some options are no longer in target_flags. */
4622 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
4623 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
4624 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
4625 rs6000_builtin_mask
);
4627 /* Initialize all of the registers. */
4628 rs6000_init_hard_regno_mode_ok (global_init_p
);
4630 /* Save the initial options in case the user does function specific options */
4632 target_option_default_node
= target_option_current_node
4633 = build_target_option_node (&global_options
);
4635 /* If not explicitly specified via option, decide whether to generate the
4636 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4637 if (TARGET_LINK_STACK
== -1)
4638 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4640 /* Deprecate use of -mno-speculate-indirect-jumps. */
4641 if (!rs6000_speculate_indirect_jumps
)
4642 warning (0, "%qs is deprecated and not recommended in any circumstances",
4643 "-mno-speculate-indirect-jumps");
4648 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4649 define the target cpu type. */
4652 rs6000_option_override (void)
4654 (void) rs6000_option_override_internal (true);
4658 /* Implement targetm.vectorize.builtin_mask_for_load. */
4660 rs6000_builtin_mask_for_load (void)
4662 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4663 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
4664 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
4665 return altivec_builtin_mask_for_load
;
4670 /* Implement LOOP_ALIGN. */
4672 rs6000_loop_align (rtx label
)
4677 /* Don't override loop alignment if -falign-loops was specified. */
4678 if (!can_override_loop_align
)
4681 bb
= BLOCK_FOR_INSN (label
);
4682 ninsns
= num_loop_insns(bb
->loop_father
);
4684 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4685 if (ninsns
> 4 && ninsns
<= 8
4686 && (rs6000_tune
== PROCESSOR_POWER4
4687 || rs6000_tune
== PROCESSOR_POWER5
4688 || rs6000_tune
== PROCESSOR_POWER6
4689 || rs6000_tune
== PROCESSOR_POWER7
4690 || rs6000_tune
== PROCESSOR_POWER8
))
4691 return align_flags (5);
4696 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4697 after applying N number of iterations. This routine does not determine
4698 how may iterations are required to reach desired alignment. */
4701 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4708 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4711 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4721 /* Assuming that all other types are naturally aligned. CHECKME! */
4726 /* Return true if the vector misalignment factor is supported by the
4729 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4736 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4739 /* Return if movmisalign pattern is not supported for this mode. */
4740 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4743 if (misalignment
== -1)
4745 /* Misalignment factor is unknown at compile time but we know
4746 it's word aligned. */
4747 if (rs6000_vector_alignment_reachable (type
, is_packed
))
4749 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
4751 if (element_size
== 64 || element_size
== 32)
4758 /* VSX supports word-aligned vector. */
4759 if (misalignment
% 4 == 0)
4765 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4767 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4768 tree vectype
, int misalign
)
4773 switch (type_of_cost
)
4781 case cond_branch_not_taken
:
4785 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4789 /* Power7 has only one permute unit, make it a bit expensive. */
4790 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
4795 case vec_promote_demote
:
4796 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4797 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
4802 case cond_branch_taken
:
4805 case unaligned_load
:
4806 case vector_gather_load
:
4807 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4808 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4811 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
4813 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4815 /* Double word aligned. */
4823 /* Double word aligned. */
4827 /* Unknown misalignment. */
4840 /* Misaligned loads are not supported. */
4843 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4846 case unaligned_store
:
4847 case vector_scatter_store
:
4848 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4851 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
4853 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4855 /* Double word aligned. */
4863 /* Double word aligned. */
4867 /* Unknown misalignment. */
4880 /* Misaligned stores are not supported. */
4886 /* This is a rough approximation assuming non-constant elements
4887 constructed into a vector via element insertion. FIXME:
4888 vec_construct is not granular enough for uniformly good
4889 decisions. If the initialization is a splat, this is
4890 cheaper than we estimate. Improve this someday. */
4891 elem_type
= TREE_TYPE (vectype
);
4892 /* 32-bit vectors loaded into registers are stored as double
4893 precision, so we need 2 permutes, 2 converts, and 1 merge
4894 to construct a vector of short floats from them. */
4895 if (SCALAR_FLOAT_TYPE_P (elem_type
)
4896 && TYPE_PRECISION (elem_type
) == 32)
4898 /* On POWER9, integer vector types are built up in GPRs and then
4899 use a direct move (2 cycles). For POWER8 this is even worse,
4900 as we need two direct moves and a merge, and the direct moves
4902 else if (INTEGRAL_TYPE_P (elem_type
))
4904 if (TARGET_P9_VECTOR
)
4905 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
4907 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
4910 /* V2DFmode doesn't need a direct move. */
4918 /* Implement targetm.vectorize.preferred_simd_mode. */
4921 rs6000_preferred_simd_mode (scalar_mode mode
)
4923 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
4925 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
4926 return vmode
.require ();
4931 typedef struct _rs6000_cost_data
4933 struct loop
*loop_info
;
4937 /* Test for likely overcommitment of vector hardware resources. If a
4938 loop iteration is relatively large, and too large a percentage of
4939 instructions in the loop are vectorized, the cost model may not
4940 adequately reflect delays from unavailable vector resources.
4941 Penalize the loop body cost for this case. */
4944 rs6000_density_test (rs6000_cost_data
*data
)
4946 const int DENSITY_PCT_THRESHOLD
= 85;
4947 const int DENSITY_SIZE_THRESHOLD
= 70;
4948 const int DENSITY_PENALTY
= 10;
4949 struct loop
*loop
= data
->loop_info
;
4950 basic_block
*bbs
= get_loop_body (loop
);
4951 int nbbs
= loop
->num_nodes
;
4952 loop_vec_info loop_vinfo
= loop_vec_info_for_loop (data
->loop_info
);
4953 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
4956 for (i
= 0; i
< nbbs
; i
++)
4958 basic_block bb
= bbs
[i
];
4959 gimple_stmt_iterator gsi
;
4961 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4963 gimple
*stmt
= gsi_stmt (gsi
);
4964 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
4966 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
4967 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
4973 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
4975 if (density_pct
> DENSITY_PCT_THRESHOLD
4976 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
4978 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_NOTE
, vect_location
,
4981 "density %d%%, cost %d exceeds threshold, penalizing "
4982 "loop body cost by %d%%", density_pct
,
4983 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
4987 /* Implement targetm.vectorize.init_cost. */
4989 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4990 instruction is needed by the vectorization. */
4991 static bool rs6000_vect_nonmem
;
4994 rs6000_init_cost (struct loop
*loop_info
)
4996 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
4997 data
->loop_info
= loop_info
;
4998 data
->cost
[vect_prologue
] = 0;
4999 data
->cost
[vect_body
] = 0;
5000 data
->cost
[vect_epilogue
] = 0;
5001 rs6000_vect_nonmem
= false;
5005 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5006 For some statement, we would like to further fine-grain tweak the cost on
5007 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5008 information on statement operation codes etc. One typical case here is
5009 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5010 for scalar cost, but it should be priced more whatever transformed to either
5011 compare + branch or compare + isel instructions. */
5014 adjust_vectorization_cost (enum vect_cost_for_stmt kind
,
5015 struct _stmt_vec_info
*stmt_info
)
5017 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5018 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5020 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5021 if (subcode
== COND_EXPR
)
5028 /* Implement targetm.vectorize.add_stmt_cost. */
5031 rs6000_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5032 struct _stmt_vec_info
*stmt_info
, int misalign
,
5033 enum vect_cost_model_location where
)
5035 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5036 unsigned retval
= 0;
5038 if (flag_vect_cost_model
)
5040 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
5041 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5043 stmt_cost
+= adjust_vectorization_cost (kind
, stmt_info
);
5044 /* Statements in an inner loop relative to the loop being
5045 vectorized are weighted more heavily. The value here is
5046 arbitrary and could potentially be improved with analysis. */
5047 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
5048 count
*= 50; /* FIXME. */
5050 retval
= (unsigned) (count
* stmt_cost
);
5051 cost_data
->cost
[where
] += retval
;
5053 /* Check whether we're doing something other than just a copy loop.
5054 Not all such loops may be profitably vectorized; see
5055 rs6000_finish_cost. */
5056 if ((kind
== vec_to_scalar
|| kind
== vec_perm
5057 || kind
== vec_promote_demote
|| kind
== vec_construct
5058 || kind
== scalar_to_vec
)
5059 || (where
== vect_body
&& kind
== vector_stmt
))
5060 rs6000_vect_nonmem
= true;
5066 /* Implement targetm.vectorize.finish_cost. */
5069 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
5070 unsigned *body_cost
, unsigned *epilogue_cost
)
5072 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5074 if (cost_data
->loop_info
)
5075 rs6000_density_test (cost_data
);
5077 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5078 that require versioning for any reason. The vectorization is at
5079 best a wash inside the loop, and the versioning checks make
5080 profitability highly unlikely and potentially quite harmful. */
5081 if (cost_data
->loop_info
)
5083 loop_vec_info vec_info
= loop_vec_info_for_loop (cost_data
->loop_info
);
5084 if (!rs6000_vect_nonmem
5085 && LOOP_VINFO_VECT_FACTOR (vec_info
) == 2
5086 && LOOP_REQUIRES_VERSIONING (vec_info
))
5087 cost_data
->cost
[vect_body
] += 10000;
5090 *prologue_cost
= cost_data
->cost
[vect_prologue
];
5091 *body_cost
= cost_data
->cost
[vect_body
];
5092 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
5095 /* Implement targetm.vectorize.destroy_cost_data. */
5098 rs6000_destroy_cost_data (void *data
)
5103 /* Implement targetm.loop_unroll_adjust. */
5106 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5108 if (unroll_only_small_loops
)
5110 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5111 example we may want to unroll very small loops more times (4 perhaps).
5112 We also should use a PARAM for this. */
5113 if (loop
->ninsns
<= 10)
5114 return MIN (2, nunroll
);
5122 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5123 library with vectorized intrinsics. */
5126 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5130 const char *suffix
= NULL
;
5131 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5134 machine_mode el_mode
, in_mode
;
5137 /* Libmass is suitable for unsafe math only as it does not correctly support
5138 parts of IEEE with the required precision such as denormals. Only support
5139 it if we have VSX to use the simd d2 or f4 functions.
5140 XXX: Add variable length support. */
5141 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5144 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5145 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5146 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5147 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5148 if (el_mode
!= in_mode
5184 if (el_mode
== DFmode
&& n
== 2)
5186 bdecl
= mathfn_built_in (double_type_node
, fn
);
5187 suffix
= "d2"; /* pow -> powd2 */
5189 else if (el_mode
== SFmode
&& n
== 4)
5191 bdecl
= mathfn_built_in (float_type_node
, fn
);
5192 suffix
= "4"; /* powf -> powf4 */
5204 gcc_assert (suffix
!= NULL
);
5205 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5209 strcpy (name
, bname
+ sizeof ("__builtin_") - 1);
5210 strcat (name
, suffix
);
5213 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5214 else if (n_args
== 2)
5215 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5219 /* Build a function declaration for the vectorized function. */
5220 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5221 FUNCTION_DECL
, get_identifier (name
), fntype
);
5222 TREE_PUBLIC (new_fndecl
) = 1;
5223 DECL_EXTERNAL (new_fndecl
) = 1;
5224 DECL_IS_NOVOPS (new_fndecl
) = 1;
5225 TREE_READONLY (new_fndecl
) = 1;
5230 /* Returns a function decl for a vectorized version of the builtin function
5231 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5232 if it is not available. */
5235 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5238 machine_mode in_mode
, out_mode
;
5241 if (TARGET_DEBUG_BUILTIN
)
5242 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5243 combined_fn_name (combined_fn (fn
)),
5244 GET_MODE_NAME (TYPE_MODE (type_out
)),
5245 GET_MODE_NAME (TYPE_MODE (type_in
)));
5247 if (TREE_CODE (type_out
) != VECTOR_TYPE
5248 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5251 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5252 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5253 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5254 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5259 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5260 && out_mode
== DFmode
&& out_n
== 2
5261 && in_mode
== DFmode
&& in_n
== 2)
5262 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
5263 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5264 && out_mode
== SFmode
&& out_n
== 4
5265 && in_mode
== SFmode
&& in_n
== 4)
5266 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
5267 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5268 && out_mode
== SFmode
&& out_n
== 4
5269 && in_mode
== SFmode
&& in_n
== 4)
5270 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
5273 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5274 && out_mode
== DFmode
&& out_n
== 2
5275 && in_mode
== DFmode
&& in_n
== 2)
5276 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
5277 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5278 && out_mode
== SFmode
&& out_n
== 4
5279 && in_mode
== SFmode
&& in_n
== 4)
5280 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
5281 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5282 && out_mode
== SFmode
&& out_n
== 4
5283 && in_mode
== SFmode
&& in_n
== 4)
5284 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
5287 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5288 && out_mode
== DFmode
&& out_n
== 2
5289 && in_mode
== DFmode
&& in_n
== 2)
5290 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
5291 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5292 && out_mode
== SFmode
&& out_n
== 4
5293 && in_mode
== SFmode
&& in_n
== 4)
5294 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
5295 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5296 && out_mode
== SFmode
&& out_n
== 4
5297 && in_mode
== SFmode
&& in_n
== 4)
5298 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
5301 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5302 && out_mode
== DFmode
&& out_n
== 2
5303 && in_mode
== DFmode
&& in_n
== 2)
5304 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
5305 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5306 && out_mode
== SFmode
&& out_n
== 4
5307 && in_mode
== SFmode
&& in_n
== 4)
5308 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
5309 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5310 && out_mode
== SFmode
&& out_n
== 4
5311 && in_mode
== SFmode
&& in_n
== 4)
5312 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
5315 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5316 && out_mode
== DFmode
&& out_n
== 2
5317 && in_mode
== DFmode
&& in_n
== 2)
5318 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
5319 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5320 && out_mode
== SFmode
&& out_n
== 4
5321 && in_mode
== SFmode
&& in_n
== 4)
5322 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
5323 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5324 && out_mode
== SFmode
&& out_n
== 4
5325 && in_mode
== SFmode
&& in_n
== 4)
5326 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
5329 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5330 && flag_unsafe_math_optimizations
5331 && out_mode
== DFmode
&& out_n
== 2
5332 && in_mode
== DFmode
&& in_n
== 2)
5333 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
5334 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5335 && flag_unsafe_math_optimizations
5336 && out_mode
== SFmode
&& out_n
== 4
5337 && in_mode
== SFmode
&& in_n
== 4)
5338 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
5341 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5342 && !flag_trapping_math
5343 && out_mode
== DFmode
&& out_n
== 2
5344 && in_mode
== DFmode
&& in_n
== 2)
5345 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
5346 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5347 && !flag_trapping_math
5348 && out_mode
== SFmode
&& out_n
== 4
5349 && in_mode
== SFmode
&& in_n
== 4)
5350 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
5356 /* Generate calls to libmass if appropriate. */
5357 if (rs6000_veclib_handler
)
5358 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5363 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5366 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
5369 machine_mode in_mode
, out_mode
;
5372 if (TARGET_DEBUG_BUILTIN
)
5373 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5374 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
5375 GET_MODE_NAME (TYPE_MODE (type_out
)),
5376 GET_MODE_NAME (TYPE_MODE (type_in
)));
5378 if (TREE_CODE (type_out
) != VECTOR_TYPE
5379 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5382 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5383 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5384 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5385 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5387 enum rs6000_builtins fn
5388 = (enum rs6000_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
5391 case RS6000_BUILTIN_RSQRTF
:
5392 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
5393 && out_mode
== SFmode
&& out_n
== 4
5394 && in_mode
== SFmode
&& in_n
== 4)
5395 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
5397 case RS6000_BUILTIN_RSQRT
:
5398 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5399 && out_mode
== DFmode
&& out_n
== 2
5400 && in_mode
== DFmode
&& in_n
== 2)
5401 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
5403 case RS6000_BUILTIN_RECIPF
:
5404 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
5405 && out_mode
== SFmode
&& out_n
== 4
5406 && in_mode
== SFmode
&& in_n
== 4)
5407 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
5409 case RS6000_BUILTIN_RECIP
:
5410 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5411 && out_mode
== DFmode
&& out_n
== 2
5412 && in_mode
== DFmode
&& in_n
== 2)
5413 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
5421 /* Default CPU string for rs6000*_file_start functions. */
5422 static const char *rs6000_default_cpu
;
5424 #ifdef USING_ELFOS_H
5425 const char *rs6000_machine
;
5428 rs6000_machine_from_flags (void)
5430 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5432 /* Disable the flags that should never influence the .machine selection. */
5433 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
);
5435 if ((flags
& (ISA_FUTURE_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5437 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5439 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5441 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5443 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5445 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5447 if ((flags
& ISA_2_1_MASKS
) != 0)
5449 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5455 emit_asm_machine (void)
5457 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5461 /* Do anything needed at the start of the asm file. */
5464 rs6000_file_start (void)
5467 const char *start
= buffer
;
5468 FILE *file
= asm_out_file
;
5470 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5472 default_file_start ();
5474 if (flag_verbose_asm
)
5476 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5478 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5480 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5484 if (global_options_set
.x_rs6000_cpu_index
)
5486 fprintf (file
, "%s -mcpu=%s", start
,
5487 processor_target_table
[rs6000_cpu_index
].name
);
5491 if (global_options_set
.x_rs6000_tune_index
)
5493 fprintf (file
, "%s -mtune=%s", start
,
5494 processor_target_table
[rs6000_tune_index
].name
);
5498 if (PPC405_ERRATUM77
)
5500 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5504 #ifdef USING_ELFOS_H
5505 switch (rs6000_sdata
)
5507 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5508 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5509 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5510 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5513 if (rs6000_sdata
&& g_switch_value
)
5515 fprintf (file
, "%s -G %d", start
,
5525 #ifdef USING_ELFOS_H
5526 rs6000_machine
= rs6000_machine_from_flags ();
5527 emit_asm_machine ();
5530 if (DEFAULT_ABI
== ABI_ELFv2
)
5531 fprintf (file
, "\t.abiversion 2\n");
5535 /* Return nonzero if this function is known to have a null epilogue. */
5538 direct_return (void)
5540 if (reload_completed
)
5542 rs6000_stack_t
*info
= rs6000_stack_info ();
5544 if (info
->first_gp_reg_save
== 32
5545 && info
->first_fp_reg_save
== 64
5546 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
5547 && ! info
->lr_save_p
5548 && ! info
->cr_save_p
5549 && info
->vrsave_size
== 0
5557 /* Helper for num_insns_constant. Calculate number of instructions to
5558 load VALUE to a single gpr using combinations of addi, addis, ori,
5559 oris and sldi instructions. */
5562 num_insns_constant_gpr (HOST_WIDE_INT value
)
5564 /* signed constant loadable with addi */
5565 if (SIGNED_INTEGER_16BIT_P (value
))
5568 /* constant loadable with addis */
5569 else if ((value
& 0xffff) == 0
5570 && (value
>> 31 == -1 || value
>> 31 == 0))
5573 /* PADDI can support up to 34 bit signed integers. */
5574 else if (TARGET_PREFIXED_ADDR
&& SIGNED_INTEGER_34BIT_P (value
))
5577 else if (TARGET_POWERPC64
)
5579 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
5580 HOST_WIDE_INT high
= value
>> 31;
5582 if (high
== 0 || high
== -1)
5588 return num_insns_constant_gpr (high
) + 1;
5590 return num_insns_constant_gpr (low
) + 1;
5592 return (num_insns_constant_gpr (high
)
5593 + num_insns_constant_gpr (low
) + 1);
5600 /* Helper for num_insns_constant. Allow constants formed by the
5601 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5602 and handle modes that require multiple gprs. */
5605 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
5607 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5611 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
5612 int insns
= num_insns_constant_gpr (low
);
5614 /* We won't get more than 2 from num_insns_constant_gpr
5615 except when TARGET_POWERPC64 and mode is DImode or
5616 wider, so the register mode must be DImode. */
5617 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
5620 value
>>= BITS_PER_WORD
;
5625 /* Return the number of instructions it takes to form a constant in as
5626 many gprs are needed for MODE. */
5629 num_insns_constant (rtx op
, machine_mode mode
)
5633 switch (GET_CODE (op
))
5639 case CONST_WIDE_INT
:
5642 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
5643 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
5650 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
5652 if (mode
== SFmode
|| mode
== SDmode
)
5657 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
5659 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
5660 /* See the first define_split in rs6000.md handling a
5661 const_double_operand. */
5665 else if (mode
== DFmode
|| mode
== DDmode
)
5670 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
5672 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
5674 /* See the second (32-bit) and third (64-bit) define_split
5675 in rs6000.md handling a const_double_operand. */
5676 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
5677 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
5680 else if (mode
== TFmode
|| mode
== TDmode
5681 || mode
== KFmode
|| mode
== IFmode
)
5687 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
5689 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
5691 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
5692 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
5693 insns
= num_insns_constant_multi (val
, DImode
);
5694 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
5695 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
5696 insns
+= num_insns_constant_multi (val
, DImode
);
5708 return num_insns_constant_multi (val
, mode
);
5711 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5712 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5713 corresponding element of the vector, but for V4SFmode, the
5714 corresponding "float" is interpreted as an SImode integer. */
5717 const_vector_elt_as_int (rtx op
, unsigned int elt
)
5721 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5722 gcc_assert (GET_MODE (op
) != V2DImode
5723 && GET_MODE (op
) != V2DFmode
);
5725 tmp
= CONST_VECTOR_ELT (op
, elt
);
5726 if (GET_MODE (op
) == V4SFmode
)
5727 tmp
= gen_lowpart (SImode
, tmp
);
5728 return INTVAL (tmp
);
5731 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5732 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5733 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5734 all items are set to the same value and contain COPIES replicas of the
5735 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5736 operand and the others are set to the value of the operand's msb. */
5739 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
5741 machine_mode mode
= GET_MODE (op
);
5742 machine_mode inner
= GET_MODE_INNER (mode
);
5750 HOST_WIDE_INT splat_val
;
5751 HOST_WIDE_INT msb_val
;
5753 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
5756 nunits
= GET_MODE_NUNITS (mode
);
5757 bitsize
= GET_MODE_BITSIZE (inner
);
5758 mask
= GET_MODE_MASK (inner
);
5760 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
5762 msb_val
= val
>= 0 ? 0 : -1;
5764 /* Construct the value to be splatted, if possible. If not, return 0. */
5765 for (i
= 2; i
<= copies
; i
*= 2)
5767 HOST_WIDE_INT small_val
;
5769 small_val
= splat_val
>> bitsize
;
5771 if (splat_val
!= ((HOST_WIDE_INT
)
5772 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
5773 | (small_val
& mask
)))
5775 splat_val
= small_val
;
5778 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5779 if (EASY_VECTOR_15 (splat_val
))
5782 /* Also check if we can splat, and then add the result to itself. Do so if
5783 the value is positive, of if the splat instruction is using OP's mode;
5784 for splat_val < 0, the splat and the add should use the same mode. */
5785 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
5786 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
5789 /* Also check if are loading up the most significant bit which can be done by
5790 loading up -1 and shifting the value left by -1. */
5791 else if (EASY_VECTOR_MSB (splat_val
, inner
))
5797 /* Check if VAL is present in every STEP-th element, and the
5798 other elements are filled with its most significant bit. */
5799 for (i
= 1; i
< nunits
; ++i
)
5801 HOST_WIDE_INT desired_val
;
5802 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
5803 if ((i
& (step
- 1)) == 0)
5806 desired_val
= msb_val
;
5808 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
5815 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5816 instruction, filling in the bottom elements with 0 or -1.
5818 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5819 for the number of zeroes to shift in, or negative for the number of 0xff
5822 OP is a CONST_VECTOR. */
5825 vspltis_shifted (rtx op
)
5827 machine_mode mode
= GET_MODE (op
);
5828 machine_mode inner
= GET_MODE_INNER (mode
);
5836 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
5839 /* We need to create pseudo registers to do the shift, so don't recognize
5840 shift vector constants after reload. */
5841 if (!can_create_pseudo_p ())
5844 nunits
= GET_MODE_NUNITS (mode
);
5845 mask
= GET_MODE_MASK (inner
);
5847 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
5849 /* Check if the value can really be the operand of a vspltis[bhw]. */
5850 if (EASY_VECTOR_15 (val
))
5853 /* Also check if we are loading up the most significant bit which can be done
5854 by loading up -1 and shifting the value left by -1. */
5855 else if (EASY_VECTOR_MSB (val
, inner
))
5861 /* Check if VAL is present in every STEP-th element until we find elements
5862 that are 0 or all 1 bits. */
5863 for (i
= 1; i
< nunits
; ++i
)
5865 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
5866 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
5868 /* If the value isn't the splat value, check for the remaining elements
5874 for (j
= i
+1; j
< nunits
; ++j
)
5876 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
5877 if (const_vector_elt_as_int (op
, elt2
) != 0)
5881 return (nunits
- i
) * GET_MODE_SIZE (inner
);
5884 else if ((elt_val
& mask
) == mask
)
5886 for (j
= i
+1; j
< nunits
; ++j
)
5888 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
5889 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
5893 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
5901 /* If all elements are equal, we don't need to do VLSDOI. */
5906 /* Return true if OP is of the given MODE and can be synthesized
5907 with a vspltisb, vspltish or vspltisw. */
5910 easy_altivec_constant (rtx op
, machine_mode mode
)
5912 unsigned step
, copies
;
5914 if (mode
== VOIDmode
)
5915 mode
= GET_MODE (op
);
5916 else if (mode
!= GET_MODE (op
))
5919 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5921 if (mode
== V2DFmode
)
5922 return zero_constant (op
, mode
);
5924 else if (mode
== V2DImode
)
5926 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
5927 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
5930 if (zero_constant (op
, mode
))
5933 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
5934 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
5940 /* V1TImode is a special container for TImode. Ignore for now. */
5941 else if (mode
== V1TImode
)
5944 /* Start with a vspltisw. */
5945 step
= GET_MODE_NUNITS (mode
) / 4;
5948 if (vspltis_constant (op
, step
, copies
))
5951 /* Then try with a vspltish. */
5957 if (vspltis_constant (op
, step
, copies
))
5960 /* And finally a vspltisb. */
5966 if (vspltis_constant (op
, step
, copies
))
5969 if (vspltis_shifted (op
) != 0)
5975 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5976 result is OP. Abort if it is not possible. */
5979 gen_easy_altivec_constant (rtx op
)
5981 machine_mode mode
= GET_MODE (op
);
5982 int nunits
= GET_MODE_NUNITS (mode
);
5983 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
5984 unsigned step
= nunits
/ 4;
5985 unsigned copies
= 1;
5987 /* Start with a vspltisw. */
5988 if (vspltis_constant (op
, step
, copies
))
5989 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
5991 /* Then try with a vspltish. */
5997 if (vspltis_constant (op
, step
, copies
))
5998 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6000 /* And finally a vspltisb. */
6006 if (vspltis_constant (op
, step
, copies
))
6007 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6012 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6013 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6015 Return the number of instructions needed (1 or 2) into the address pointed
6018 Return the constant that is being split via CONSTANT_PTR. */
6021 xxspltib_constant_p (rtx op
,
6026 size_t nunits
= GET_MODE_NUNITS (mode
);
6028 HOST_WIDE_INT value
;
6031 /* Set the returned values to out of bound values. */
6032 *num_insns_ptr
= -1;
6033 *constant_ptr
= 256;
6035 if (!TARGET_P9_VECTOR
)
6038 if (mode
== VOIDmode
)
6039 mode
= GET_MODE (op
);
6041 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6044 /* Handle (vec_duplicate <constant>). */
6045 if (GET_CODE (op
) == VEC_DUPLICATE
)
6047 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6048 && mode
!= V2DImode
)
6051 element
= XEXP (op
, 0);
6052 if (!CONST_INT_P (element
))
6055 value
= INTVAL (element
);
6056 if (!IN_RANGE (value
, -128, 127))
6060 /* Handle (const_vector [...]). */
6061 else if (GET_CODE (op
) == CONST_VECTOR
)
6063 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6064 && mode
!= V2DImode
)
6067 element
= CONST_VECTOR_ELT (op
, 0);
6068 if (!CONST_INT_P (element
))
6071 value
= INTVAL (element
);
6072 if (!IN_RANGE (value
, -128, 127))
6075 for (i
= 1; i
< nunits
; i
++)
6077 element
= CONST_VECTOR_ELT (op
, i
);
6078 if (!CONST_INT_P (element
))
6081 if (value
!= INTVAL (element
))
6086 /* Handle integer constants being loaded into the upper part of the VSX
6087 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6088 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6089 else if (CONST_INT_P (op
))
6091 if (!SCALAR_INT_MODE_P (mode
))
6094 value
= INTVAL (op
);
6095 if (!IN_RANGE (value
, -128, 127))
6098 if (!IN_RANGE (value
, -1, 0))
6100 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6103 if (EASY_VECTOR_15 (value
))
6111 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6112 sign extend. Special case 0/-1 to allow getting any VSX register instead
6113 of an Altivec register. */
6114 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6115 && EASY_VECTOR_15 (value
))
6118 /* Return # of instructions and the constant byte for XXSPLTIB. */
6119 if (mode
== V16QImode
)
6122 else if (IN_RANGE (value
, -1, 0))
6128 *constant_ptr
= (int) value
;
6133 output_vec_const_move (rtx
*operands
)
6141 mode
= GET_MODE (dest
);
6145 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6146 int xxspltib_value
= 256;
6149 if (zero_constant (vec
, mode
))
6151 if (TARGET_P9_VECTOR
)
6152 return "xxspltib %x0,0";
6154 else if (dest_vmx_p
)
6155 return "vspltisw %0,0";
6158 return "xxlxor %x0,%x0,%x0";
6161 if (all_ones_constant (vec
, mode
))
6163 if (TARGET_P9_VECTOR
)
6164 return "xxspltib %x0,255";
6166 else if (dest_vmx_p
)
6167 return "vspltisw %0,-1";
6169 else if (TARGET_P8_VECTOR
)
6170 return "xxlorc %x0,%x0,%x0";
6176 if (TARGET_P9_VECTOR
6177 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6181 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6182 return "xxspltib %x0,%2";
6193 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6194 if (zero_constant (vec
, mode
))
6195 return "vspltisw %0,0";
6197 if (all_ones_constant (vec
, mode
))
6198 return "vspltisw %0,-1";
6200 /* Do we need to construct a value using VSLDOI? */
6201 shift
= vspltis_shifted (vec
);
6205 splat_vec
= gen_easy_altivec_constant (vec
);
6206 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6207 operands
[1] = XEXP (splat_vec
, 0);
6208 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6211 switch (GET_MODE (splat_vec
))
6214 return "vspltisw %0,%1";
6217 return "vspltish %0,%1";
6220 return "vspltisb %0,%1";
6230 /* Initialize vector TARGET to VALS. */
6233 rs6000_expand_vector_init (rtx target
, rtx vals
)
6235 machine_mode mode
= GET_MODE (target
);
6236 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6237 int n_elts
= GET_MODE_NUNITS (mode
);
6238 int n_var
= 0, one_var
= -1;
6239 bool all_same
= true, all_const_zero
= true;
6243 for (i
= 0; i
< n_elts
; ++i
)
6245 x
= XVECEXP (vals
, 0, i
);
6246 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6247 ++n_var
, one_var
= i
;
6248 else if (x
!= CONST0_RTX (inner_mode
))
6249 all_const_zero
= false;
6251 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6257 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6258 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6259 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6261 /* Zero register. */
6262 emit_move_insn (target
, CONST0_RTX (mode
));
6265 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6267 /* Splat immediate. */
6268 emit_insn (gen_rtx_SET (target
, const_vec
));
6273 /* Load from constant pool. */
6274 emit_move_insn (target
, const_vec
);
6279 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6280 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6284 size_t num_elements
= all_same
? 1 : 2;
6285 for (i
= 0; i
< num_elements
; i
++)
6287 op
[i
] = XVECEXP (vals
, 0, i
);
6288 /* Just in case there is a SUBREG with a smaller mode, do a
6290 if (GET_MODE (op
[i
]) != inner_mode
)
6292 rtx tmp
= gen_reg_rtx (inner_mode
);
6293 convert_move (tmp
, op
[i
], 0);
6296 /* Allow load with splat double word. */
6297 else if (MEM_P (op
[i
]))
6300 op
[i
] = force_reg (inner_mode
, op
[i
]);
6302 else if (!REG_P (op
[i
]))
6303 op
[i
] = force_reg (inner_mode
, op
[i
]);
6308 if (mode
== V2DFmode
)
6309 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6311 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6315 if (mode
== V2DFmode
)
6316 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6318 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6323 /* Special case initializing vector int if we are on 64-bit systems with
6324 direct move or we have the ISA 3.0 instructions. */
6325 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6326 && TARGET_DIRECT_MOVE_64BIT
)
6330 rtx element0
= XVECEXP (vals
, 0, 0);
6331 if (MEM_P (element0
))
6332 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6334 element0
= force_reg (SImode
, element0
);
6336 if (TARGET_P9_VECTOR
)
6337 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6340 rtx tmp
= gen_reg_rtx (DImode
);
6341 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6342 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6351 for (i
= 0; i
< 4; i
++)
6352 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6354 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6355 elements
[2], elements
[3]));
6360 /* With single precision floating point on VSX, know that internally single
6361 precision is actually represented as a double, and either make 2 V2DF
6362 vectors, and convert these vectors to single precision, or do one
6363 conversion, and splat the result to the other elements. */
6364 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6368 rtx element0
= XVECEXP (vals
, 0, 0);
6370 if (TARGET_P9_VECTOR
)
6372 if (MEM_P (element0
))
6373 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6375 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6380 rtx freg
= gen_reg_rtx (V4SFmode
);
6381 rtx sreg
= force_reg (SFmode
, element0
);
6382 rtx cvt
= (TARGET_XSCVDPSPN
6383 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6384 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6387 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6393 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6394 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6395 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6396 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6397 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6398 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6399 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6400 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6402 /* Use VMRGEW if we can instead of doing a permute. */
6403 if (TARGET_P8_VECTOR
)
6405 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op2
));
6406 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op1
, op3
));
6407 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6408 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6409 if (BYTES_BIG_ENDIAN
)
6410 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_even
, flt_odd
));
6412 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_odd
, flt_even
));
6416 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6417 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6418 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6419 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6420 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6426 /* Special case initializing vector short/char that are splats if we are on
6427 64-bit systems with direct move. */
6428 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6429 && (mode
== V16QImode
|| mode
== V8HImode
))
6431 rtx op0
= XVECEXP (vals
, 0, 0);
6432 rtx di_tmp
= gen_reg_rtx (DImode
);
6435 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6437 if (mode
== V16QImode
)
6439 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6440 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6444 if (mode
== V8HImode
)
6446 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
6447 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
6452 /* Store value to stack temp. Load vector element. Splat. However, splat
6453 of 64-bit items is not supported on Altivec. */
6454 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
6456 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
6457 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
6458 XVECEXP (vals
, 0, 0));
6459 x
= gen_rtx_UNSPEC (VOIDmode
,
6460 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
6461 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6463 gen_rtx_SET (target
, mem
),
6465 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
6466 gen_rtx_PARALLEL (VOIDmode
,
6467 gen_rtvec (1, const0_rtx
)));
6468 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
6472 /* One field is non-constant. Load constant then overwrite
6476 rtx copy
= copy_rtx (vals
);
6478 /* Load constant part of vector, substitute neighboring value for
6480 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
6481 rs6000_expand_vector_init (target
, copy
);
6483 /* Insert variable. */
6484 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
), one_var
);
6488 /* Construct the vector in memory one field at a time
6489 and load the whole vector. */
6490 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
6491 for (i
= 0; i
< n_elts
; i
++)
6492 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
6493 i
* GET_MODE_SIZE (inner_mode
)),
6494 XVECEXP (vals
, 0, i
));
6495 emit_move_insn (target
, mem
);
6498 /* Set field ELT of TARGET to VAL. */
6501 rs6000_expand_vector_set (rtx target
, rtx val
, int elt
)
6503 machine_mode mode
= GET_MODE (target
);
6504 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6505 rtx reg
= gen_reg_rtx (mode
);
6507 int width
= GET_MODE_SIZE (inner_mode
);
6510 val
= force_reg (GET_MODE (val
), val
);
6512 if (VECTOR_MEM_VSX_P (mode
))
6514 rtx insn
= NULL_RTX
;
6515 rtx elt_rtx
= GEN_INT (elt
);
6517 if (mode
== V2DFmode
)
6518 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
6520 else if (mode
== V2DImode
)
6521 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
6523 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
6525 if (mode
== V4SImode
)
6526 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
6527 else if (mode
== V8HImode
)
6528 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
6529 else if (mode
== V16QImode
)
6530 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
6531 else if (mode
== V4SFmode
)
6532 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
6542 /* Simplify setting single element vectors like V1TImode. */
6543 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
) && elt
== 0)
6545 emit_move_insn (target
, gen_lowpart (mode
, val
));
6549 /* Load single variable value. */
6550 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
6551 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
6552 x
= gen_rtx_UNSPEC (VOIDmode
,
6553 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
6554 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6556 gen_rtx_SET (reg
, mem
),
6559 /* Linear sequence. */
6560 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
6561 for (i
= 0; i
< 16; ++i
)
6562 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
6564 /* Set permute mask to insert element into target. */
6565 for (i
= 0; i
< width
; ++i
)
6566 XVECEXP (mask
, 0, elt
*width
+ i
)
6567 = GEN_INT (i
+ 0x10);
6568 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
6570 if (BYTES_BIG_ENDIAN
)
6571 x
= gen_rtx_UNSPEC (mode
,
6572 gen_rtvec (3, target
, reg
,
6573 force_reg (V16QImode
, x
)),
6577 if (TARGET_P9_VECTOR
)
6578 x
= gen_rtx_UNSPEC (mode
,
6579 gen_rtvec (3, reg
, target
,
6580 force_reg (V16QImode
, x
)),
6584 /* Invert selector. We prefer to generate VNAND on P8 so
6585 that future fusion opportunities can kick in, but must
6586 generate VNOR elsewhere. */
6587 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
6588 rtx iorx
= (TARGET_P8_VECTOR
6589 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
6590 : gen_rtx_AND (V16QImode
, notx
, notx
));
6591 rtx tmp
= gen_reg_rtx (V16QImode
);
6592 emit_insn (gen_rtx_SET (tmp
, iorx
));
6594 /* Permute with operands reversed and adjusted selector. */
6595 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
6600 emit_insn (gen_rtx_SET (target
, x
));
6603 /* Extract field ELT from VEC into TARGET. */
6606 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
6608 machine_mode mode
= GET_MODE (vec
);
6609 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6612 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
6619 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
6622 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
6625 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
6628 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
6631 if (TARGET_DIRECT_MOVE_64BIT
)
6633 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
6639 if (TARGET_DIRECT_MOVE_64BIT
)
6641 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
6647 if (TARGET_DIRECT_MOVE_64BIT
)
6649 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
6655 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
6656 && TARGET_DIRECT_MOVE_64BIT
)
6658 if (GET_MODE (elt
) != DImode
)
6660 rtx tmp
= gen_reg_rtx (DImode
);
6661 convert_move (tmp
, elt
, 0);
6664 else if (!REG_P (elt
))
6665 elt
= force_reg (DImode
, elt
);
6670 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
6674 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
6678 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
6682 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
6686 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
6690 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
6694 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
6702 /* Allocate mode-sized buffer. */
6703 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
6705 emit_move_insn (mem
, vec
);
6706 if (CONST_INT_P (elt
))
6708 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
6710 /* Add offset to field within buffer matching vector element. */
6711 mem
= adjust_address_nv (mem
, inner_mode
,
6712 modulo_elt
* GET_MODE_SIZE (inner_mode
));
6713 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
6717 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
6718 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
6719 rtx new_addr
= gen_reg_rtx (Pmode
);
6721 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
6723 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
6724 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
6725 new_addr
= change_address (mem
, inner_mode
, new_addr
);
6726 emit_move_insn (target
, new_addr
);
6730 /* Return the offset within a memory object (MEM) of a vector type to a given
6731 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6732 the element is constant, we return a constant integer.
6734 Otherwise, we use a base register temporary to calculate the offset after
6735 masking it to fit within the bounds of the vector and scaling it. The
6736 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6737 built-in function. */
6740 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
6742 if (CONST_INT_P (element
))
6743 return GEN_INT (INTVAL (element
) * scalar_size
);
6745 /* All insns should use the 'Q' constraint (address is a single register) if
6746 the element number is not a constant. */
6747 rtx addr
= XEXP (mem
, 0);
6748 gcc_assert (satisfies_constraint_Q (addr
));
6750 /* Mask the element to make sure the element number is between 0 and the
6751 maximum number of elements - 1 so that we don't generate an address
6752 outside the vector. */
6753 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
6754 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
6755 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
6757 /* Shift the element to get the byte offset from the element number. */
6758 int shift
= exact_log2 (scalar_size
);
6759 gcc_assert (shift
>= 0);
6763 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
6764 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
6770 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6771 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6772 temporary (BASE_TMP) to fixup the address. Return the new memory address
6773 that is valid for reads or writes to a given register (SCALAR_REG).
6775 This function is expected to be called after reload is completed when we are
6776 splitting insns. The temporary BASE_TMP might be set multiple times with
6780 rs6000_adjust_vec_address (rtx scalar_reg
,
6784 machine_mode scalar_mode
)
6786 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
6787 rtx addr
= XEXP (mem
, 0);
6790 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
6791 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
6793 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6794 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
6796 /* Calculate what we need to add to the address to get the element
6798 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
6800 /* Create the new address pointing to the element within the vector. If we
6801 are adding 0, we don't have to change the address. */
6802 if (element_offset
== const0_rtx
)
6805 /* A simple indirect address can be converted into a reg + offset
6807 else if (REG_P (addr
) || SUBREG_P (addr
))
6808 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
6810 /* Optimize D-FORM addresses with constant offset with a constant element, to
6811 include the element offset in the address directly. */
6812 else if (GET_CODE (addr
) == PLUS
)
6814 rtx op0
= XEXP (addr
, 0);
6815 rtx op1
= XEXP (addr
, 1);
6817 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
6818 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
6820 /* op0 should never be r0, because r0+offset is not valid. But it
6821 doesn't hurt to make sure it is not r0. */
6822 gcc_assert (reg_or_subregno (op0
) != 0);
6824 /* D-FORM address with constant element number. */
6825 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
6826 rtx offset_rtx
= GEN_INT (offset
);
6827 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
6831 /* If we don't have a D-FORM address with a constant element number,
6832 add the two elements in the current address. Then add the offset.
6834 Previously, we tried to add the offset to OP1 and change the
6835 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6836 complicated because we had to verify that op1 was not GPR0 and we
6837 had a constant element offset (due to the way ADDI is defined).
6838 By doing the add of OP0 and OP1 first, and then adding in the
6839 offset, it has the benefit that if D-FORM instructions are
6840 allowed, the offset is part of the memory access to the vector
6842 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
6843 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
6849 emit_move_insn (base_tmp
, addr
);
6850 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
6853 /* If the address isn't valid, move the address into the temporary base
6854 register. Some reasons it could not be valid include:
6856 The address offset overflowed the 16 or 34 bit offset size;
6857 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6858 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6859 Only X_FORM loads can be done, and the address is D_FORM. */
6861 enum insn_form iform
6862 = address_to_insn_form (new_addr
, scalar_mode
,
6863 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
6865 if (iform
== INSN_FORM_BAD
)
6867 emit_move_insn (base_tmp
, new_addr
);
6868 new_addr
= base_tmp
;
6871 return change_address (mem
, scalar_mode
, new_addr
);
6874 /* Split a variable vec_extract operation into the component instructions. */
6877 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
6880 machine_mode mode
= GET_MODE (src
);
6881 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
6882 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
6883 int byte_shift
= exact_log2 (scalar_size
);
6885 gcc_assert (byte_shift
>= 0);
6887 /* If we are given a memory address, optimize to load just the element. We
6888 don't have to adjust the vector element number on little endian
6892 emit_move_insn (dest
,
6893 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
6898 else if (REG_P (src
) || SUBREG_P (src
))
6900 int num_elements
= GET_MODE_NUNITS (mode
);
6901 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
6902 int bit_shift
= 7 - exact_log2 (num_elements
);
6904 unsigned int dest_regno
= reg_or_subregno (dest
);
6905 unsigned int src_regno
= reg_or_subregno (src
);
6906 unsigned int element_regno
= reg_or_subregno (element
);
6908 gcc_assert (REG_P (tmp_gpr
));
6910 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6911 a general purpose register. */
6912 if (TARGET_P9_VECTOR
6913 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
6914 && INT_REGNO_P (dest_regno
)
6915 && ALTIVEC_REGNO_P (src_regno
)
6916 && INT_REGNO_P (element_regno
))
6918 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
6919 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
6921 if (mode
== V16QImode
)
6922 emit_insn (BYTES_BIG_ENDIAN
6923 ? gen_vextublx (dest_si
, element_si
, src
)
6924 : gen_vextubrx (dest_si
, element_si
, src
));
6926 else if (mode
== V8HImode
)
6928 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
6929 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
6930 emit_insn (BYTES_BIG_ENDIAN
6931 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
6932 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
6938 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
6939 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
6940 emit_insn (BYTES_BIG_ENDIAN
6941 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
6942 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
6949 gcc_assert (REG_P (tmp_altivec
));
6951 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6952 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6953 will shift the element into the upper position (adding 3 to convert a
6954 byte shift into a bit shift). */
6955 if (scalar_size
== 8)
6957 if (!BYTES_BIG_ENDIAN
)
6959 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
6965 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
6967 emit_insn (gen_rtx_SET (tmp_gpr
,
6968 gen_rtx_AND (DImode
,
6969 gen_rtx_ASHIFT (DImode
,
6976 if (!BYTES_BIG_ENDIAN
)
6978 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
6980 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
6981 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
6987 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
6990 /* Get the value into the lower byte of the Altivec register where VSLO
6992 if (TARGET_P9_VECTOR
)
6993 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
6994 else if (can_create_pseudo_p ())
6995 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
6998 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
6999 emit_move_insn (tmp_di
, tmp_gpr
);
7000 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7003 /* Do the VSLO to get the value into the final location. */
7007 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7011 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7016 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7017 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7018 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7019 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7022 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7030 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7031 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7032 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7033 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7035 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7036 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7037 GEN_INT (64 - bits_in_element
)));
7051 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7052 selects whether the alignment is abi mandated, optional, or
7053 both abi and optional alignment. */
7056 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
7058 if (how
!= align_opt
)
7060 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
7064 if (how
!= align_abi
)
7066 if (TREE_CODE (type
) == ARRAY_TYPE
7067 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
7069 if (align
< BITS_PER_WORD
)
7070 align
= BITS_PER_WORD
;
7077 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7078 instructions simply ignore the low bits; VSX memory instructions
7079 are aligned to 4 or 8 bytes. */
7082 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
7084 return (STRICT_ALIGNMENT
7085 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7086 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
7087 || ((VECTOR_MODE_P (mode
) || FLOAT128_VECTOR_P (mode
))
7088 && (int) align
< VECTOR_ALIGN (mode
)))));
7091 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7094 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
7096 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
7098 if (computed
!= 128)
7101 if (!warned
&& warn_psabi
)
7104 inform (input_location
,
7105 "the layout of aggregates containing vectors with"
7106 " %d-byte alignment has changed in GCC 5",
7107 computed
/ BITS_PER_UNIT
);
7110 /* In current GCC there is no special case. */
7117 /* AIX increases natural record alignment to doubleword if the first
7118 field is an FP double while the FP fields remain word aligned. */
7121 rs6000_special_round_type_align (tree type
, unsigned int computed
,
7122 unsigned int specified
)
7124 unsigned int align
= MAX (computed
, specified
);
7125 tree field
= TYPE_FIELDS (type
);
7127 /* Skip all non field decls */
7128 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
7129 field
= DECL_CHAIN (field
);
7131 if (field
!= NULL
&& field
!= type
)
7133 type
= TREE_TYPE (field
);
7134 while (TREE_CODE (type
) == ARRAY_TYPE
)
7135 type
= TREE_TYPE (type
);
7137 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
7138 align
= MAX (align
, 64);
7144 /* Darwin increases record alignment to the natural alignment of
7148 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
7149 unsigned int specified
)
7151 unsigned int align
= MAX (computed
, specified
);
7153 if (TYPE_PACKED (type
))
7156 /* Find the first field, looking down into aggregates. */
7158 tree field
= TYPE_FIELDS (type
);
7159 /* Skip all non field decls */
7160 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
7161 field
= DECL_CHAIN (field
);
7164 /* A packed field does not contribute any extra alignment. */
7165 if (DECL_PACKED (field
))
7167 type
= TREE_TYPE (field
);
7168 while (TREE_CODE (type
) == ARRAY_TYPE
)
7169 type
= TREE_TYPE (type
);
7170 } while (AGGREGATE_TYPE_P (type
));
7172 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
7173 align
= MAX (align
, TYPE_ALIGN (type
));
7178 /* Return 1 for an operand in small memory on V.4/eabi. */
7181 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
7182 machine_mode mode ATTRIBUTE_UNUSED
)
7187 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
7190 if (DEFAULT_ABI
!= ABI_V4
)
7193 if (SYMBOL_REF_P (op
))
7196 else if (GET_CODE (op
) != CONST
7197 || GET_CODE (XEXP (op
, 0)) != PLUS
7198 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
7199 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
7204 rtx sum
= XEXP (op
, 0);
7205 HOST_WIDE_INT summand
;
7207 /* We have to be careful here, because it is the referenced address
7208 that must be 32k from _SDA_BASE_, not just the symbol. */
7209 summand
= INTVAL (XEXP (sum
, 1));
7210 if (summand
< 0 || summand
> g_switch_value
)
7213 sym_ref
= XEXP (sum
, 0);
7216 return SYMBOL_REF_SMALL_P (sym_ref
);
7222 /* Return true if either operand is a general purpose register. */
7225 gpr_or_gpr_p (rtx op0
, rtx op1
)
7227 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
7228 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
7231 /* Return true if this is a move direct operation between GPR registers and
7232 floating point/VSX registers. */
7235 direct_move_p (rtx op0
, rtx op1
)
7237 if (!REG_P (op0
) || !REG_P (op1
))
7240 if (!TARGET_DIRECT_MOVE
)
7243 int regno0
= REGNO (op0
);
7244 int regno1
= REGNO (op1
);
7245 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
7248 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
7251 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
7257 /* Return true if the ADDR is an acceptable address for a quad memory
7258 operation of mode MODE (either LQ/STQ for general purpose registers, or
7259 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7260 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7261 3.0 LXV/STXV instruction. */
7264 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
7268 if (GET_MODE_SIZE (mode
) != 16)
7271 if (legitimate_indirect_address_p (addr
, strict
))
7274 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
7277 /* Is this a valid prefixed address? If the bottom four bits of the offset
7278 are non-zero, we could use a prefixed instruction (which does not have the
7279 DQ-form constraint that the traditional instruction had) instead of
7280 forcing the unaligned offset to a GPR. */
7281 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
7284 if (GET_CODE (addr
) != PLUS
)
7287 op0
= XEXP (addr
, 0);
7288 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
7291 op1
= XEXP (addr
, 1);
7292 if (!CONST_INT_P (op1
))
7295 return quad_address_offset_p (INTVAL (op1
));
7298 /* Return true if this is a load or store quad operation. This function does
7299 not handle the atomic quad memory instructions. */
7302 quad_load_store_p (rtx op0
, rtx op1
)
7306 if (!TARGET_QUAD_MEMORY
)
7309 else if (REG_P (op0
) && MEM_P (op1
))
7310 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
7311 && quad_memory_operand (op1
, GET_MODE (op1
))
7312 && !reg_overlap_mentioned_p (op0
, op1
));
7314 else if (MEM_P (op0
) && REG_P (op1
))
7315 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
7316 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
7321 if (TARGET_DEBUG_ADDR
)
7323 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
7324 ret
? "true" : "false");
7325 debug_rtx (gen_rtx_SET (op0
, op1
));
7331 /* Given an address, return a constant offset term if one exists. */
7334 address_offset (rtx op
)
7336 if (GET_CODE (op
) == PRE_INC
7337 || GET_CODE (op
) == PRE_DEC
)
7339 else if (GET_CODE (op
) == PRE_MODIFY
7340 || GET_CODE (op
) == LO_SUM
)
7343 if (GET_CODE (op
) == CONST
)
7346 if (GET_CODE (op
) == PLUS
)
7349 if (CONST_INT_P (op
))
7355 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7356 the mode. If we can't find (or don't know) the alignment of the symbol
7357 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7358 should be pessimistic]. Offsets are validated in the same way as for
7361 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
7363 /* We should not get here with this. */
7364 gcc_checking_assert (! mode_supports_dq_form (mode
));
7366 if (GET_CODE (x
) == CONST
)
7369 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
7370 x
= XVECEXP (x
, 0, 0);
7373 unsigned HOST_WIDE_INT offset
= 0;
7375 if (GET_CODE (x
) == PLUS
)
7378 if (! SYMBOL_REF_P (sym
))
7380 if (!CONST_INT_P (XEXP (x
, 1)))
7382 offset
= INTVAL (XEXP (x
, 1));
7384 else if (SYMBOL_REF_P (x
))
7386 else if (CONST_INT_P (x
))
7387 offset
= INTVAL (x
);
7388 else if (GET_CODE (x
) == LABEL_REF
)
7389 offset
= 0; // We assume code labels are Pmode aligned
7391 return false; // not sure what we have here.
7393 /* If we don't know the alignment of the thing to which the symbol refers,
7394 we assume optimistically it is "enough".
7395 ??? maybe we should be pessimistic instead. */
7400 tree decl
= SYMBOL_REF_DECL (sym
);
7402 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
7403 /* The decl in an indirection symbol is the original one, which might
7404 be less aligned than the indirection. Our indirections are always
7409 if (decl
&& DECL_ALIGN (decl
))
7410 align
= DECL_ALIGN_UNIT (decl
);
7413 unsigned int extra
= 0;
7419 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7421 if (VECTOR_MEM_VSX_P (mode
))
7424 if (!TARGET_POWERPC64
)
7426 else if ((offset
& 3) || (align
& 3))
7437 if (!TARGET_POWERPC64
)
7439 else if ((offset
& 3) || (align
& 3))
7447 /* We only care if the access(es) would cause a change to the high part. */
7448 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
7449 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
7452 /* Return true if the MEM operand is a memory operand suitable for use
7453 with a (full width, possibly multiple) gpr load/store. On
7454 powerpc64 this means the offset must be divisible by 4.
7455 Implements 'Y' constraint.
7457 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7458 a constraint function we know the operand has satisfied a suitable
7461 Offsetting a lo_sum should not be allowed, except where we know by
7462 alignment that a 32k boundary is not crossed. Note that by
7463 "offsetting" here we mean a further offset to access parts of the
7464 MEM. It's fine to have a lo_sum where the inner address is offset
7465 from a sym, since the same sym+offset will appear in the high part
7466 of the address calculation. */
7469 mem_operand_gpr (rtx op
, machine_mode mode
)
7471 unsigned HOST_WIDE_INT offset
;
7473 rtx addr
= XEXP (op
, 0);
7475 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7477 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
7478 && mode_supports_pre_incdec_p (mode
)
7479 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
7482 /* Allow prefixed instructions if supported. If the bottom two bits of the
7483 offset are non-zero, we could use a prefixed instruction (which does not
7484 have the DS-form constraint that the traditional instruction had) instead
7485 of forcing the unaligned offset to a GPR. */
7486 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
7489 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7490 really OK. Doing this early avoids teaching all the other machinery
7492 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
7493 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
7495 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7496 if (!rs6000_offsettable_memref_p (op
, mode
, false))
7499 op
= address_offset (addr
);
7503 offset
= INTVAL (op
);
7504 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
7507 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
7511 if (GET_CODE (addr
) == LO_SUM
)
7512 /* For lo_sum addresses, we must allow any offset except one that
7513 causes a wrap, so test only the low 16 bits. */
7514 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
7516 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
7519 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7520 enforce an offset divisible by 4 even for 32-bit. */
7523 mem_operand_ds_form (rtx op
, machine_mode mode
)
7525 unsigned HOST_WIDE_INT offset
;
7527 rtx addr
= XEXP (op
, 0);
7529 /* Allow prefixed instructions if supported. If the bottom two bits of the
7530 offset are non-zero, we could use a prefixed instruction (which does not
7531 have the DS-form constraint that the traditional instruction had) instead
7532 of forcing the unaligned offset to a GPR. */
7533 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
7536 if (!offsettable_address_p (false, mode
, addr
))
7539 op
= address_offset (addr
);
7543 offset
= INTVAL (op
);
7544 if ((offset
& 3) != 0)
7547 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
7551 if (GET_CODE (addr
) == LO_SUM
)
7552 /* For lo_sum addresses, we must allow any offset except one that
7553 causes a wrap, so test only the low 16 bits. */
7554 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
7556 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
7559 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7562 reg_offset_addressing_ok_p (machine_mode mode
)
7576 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7577 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7578 a vector mode, if we want to use the VSX registers to move it around,
7579 we need to restrict ourselves to reg+reg addressing. Similarly for
7580 IEEE 128-bit floating point that is passed in a single vector
7582 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
7583 return mode_supports_dq_form (mode
);
7587 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7588 addressing for the LFIWZX and STFIWX instructions. */
7589 if (TARGET_NO_SDMODE_STACK
)
7601 virtual_stack_registers_memory_p (rtx op
)
7606 regnum
= REGNO (op
);
7608 else if (GET_CODE (op
) == PLUS
7609 && REG_P (XEXP (op
, 0))
7610 && CONST_INT_P (XEXP (op
, 1)))
7611 regnum
= REGNO (XEXP (op
, 0));
7616 return (regnum
>= FIRST_VIRTUAL_REGISTER
7617 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
7620 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7621 is known to not straddle a 32k boundary. This function is used
7622 to determine whether -mcmodel=medium code can use TOC pointer
7623 relative addressing for OP. This means the alignment of the TOC
7624 pointer must also be taken into account, and unfortunately that is
7627 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7628 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7632 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
7636 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
7638 if (!SYMBOL_REF_P (op
))
7641 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7643 if (mode_supports_dq_form (mode
))
7646 dsize
= GET_MODE_SIZE (mode
);
7647 decl
= SYMBOL_REF_DECL (op
);
7653 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7654 replacing memory addresses with an anchor plus offset. We
7655 could find the decl by rummaging around in the block->objects
7656 VEC for the given offset but that seems like too much work. */
7657 dalign
= BITS_PER_UNIT
;
7658 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
7659 && SYMBOL_REF_ANCHOR_P (op
)
7660 && SYMBOL_REF_BLOCK (op
) != NULL
)
7662 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
7664 dalign
= block
->alignment
;
7665 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
7667 else if (CONSTANT_POOL_ADDRESS_P (op
))
7669 /* It would be nice to have get_pool_align().. */
7670 machine_mode cmode
= get_pool_mode (op
);
7672 dalign
= GET_MODE_ALIGNMENT (cmode
);
7675 else if (DECL_P (decl
))
7677 dalign
= DECL_ALIGN (decl
);
7681 /* Allow BLKmode when the entire object is known to not
7682 cross a 32k boundary. */
7683 if (!DECL_SIZE_UNIT (decl
))
7686 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
7689 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
7693 dalign
/= BITS_PER_UNIT
;
7694 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
7695 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
7696 return dalign
>= dsize
;
7702 /* Find how many bits of the alignment we know for this access. */
7703 dalign
/= BITS_PER_UNIT
;
7704 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
7705 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
7707 lsb
= offset
& -offset
;
7711 return dalign
>= dsize
;
7715 constant_pool_expr_p (rtx op
)
7719 split_const (op
, &base
, &offset
);
7720 return (SYMBOL_REF_P (base
)
7721 && CONSTANT_POOL_ADDRESS_P (base
)
7722 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
7725 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7726 use that as the register to put the HIGH value into if register allocation
7730 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
7732 rtx tocrel
, tocreg
, hi
;
7734 gcc_assert (TARGET_TOC
);
7736 if (TARGET_DEBUG_ADDR
)
7738 if (SYMBOL_REF_P (symbol
))
7739 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7743 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
7744 GET_RTX_NAME (GET_CODE (symbol
)));
7749 if (!can_create_pseudo_p ())
7750 df_set_regs_ever_live (TOC_REGISTER
, true);
7752 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
7753 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
7754 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
7757 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
7758 if (largetoc_reg
!= NULL
)
7760 emit_move_insn (largetoc_reg
, hi
);
7763 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
7766 /* These are only used to pass through from print_operand/print_operand_address
7767 to rs6000_output_addr_const_extra over the intervening function
7768 output_addr_const which is not target code. */
7769 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
7771 /* Return true if OP is a toc pointer relative address (the output
7772 of create_TOC_reference). If STRICT, do not match non-split
7773 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7774 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7775 TOCREL_OFFSET_RET respectively. */
7778 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
7779 const_rtx
*tocrel_offset_ret
)
7784 if (TARGET_CMODEL
!= CMODEL_SMALL
)
7786 /* When strict ensure we have everything tidy. */
7788 && !(GET_CODE (op
) == LO_SUM
7789 && REG_P (XEXP (op
, 0))
7790 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
7793 /* When not strict, allow non-split TOC addresses and also allow
7794 (lo_sum (high ..)) TOC addresses created during reload. */
7795 if (GET_CODE (op
) == LO_SUM
)
7799 const_rtx tocrel_base
= op
;
7800 const_rtx tocrel_offset
= const0_rtx
;
7802 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
7804 tocrel_base
= XEXP (op
, 0);
7805 tocrel_offset
= XEXP (op
, 1);
7808 if (tocrel_base_ret
)
7809 *tocrel_base_ret
= tocrel_base
;
7810 if (tocrel_offset_ret
)
7811 *tocrel_offset_ret
= tocrel_offset
;
7813 return (GET_CODE (tocrel_base
) == UNSPEC
7814 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
7815 && REG_P (XVECEXP (tocrel_base
, 0, 1))
7816 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
7819 /* Return true if X is a constant pool address, and also for cmodel=medium
7820 if X is a toc-relative address known to be offsettable within MODE. */
7823 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
7826 const_rtx tocrel_base
, tocrel_offset
;
7827 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
7828 && (TARGET_CMODEL
!= CMODEL_MEDIUM
7829 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
7831 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
7832 INTVAL (tocrel_offset
), mode
)));
7836 legitimate_small_data_p (machine_mode mode
, rtx x
)
7838 return (DEFAULT_ABI
== ABI_V4
7839 && !flag_pic
&& !TARGET_TOC
7840 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
7841 && small_data_operand (x
, mode
));
7845 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
7846 bool strict
, bool worst_case
)
7848 unsigned HOST_WIDE_INT offset
;
7851 if (GET_CODE (x
) != PLUS
)
7853 if (!REG_P (XEXP (x
, 0)))
7855 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
7857 if (mode_supports_dq_form (mode
))
7858 return quad_address_p (x
, mode
, strict
);
7859 if (!reg_offset_addressing_ok_p (mode
))
7860 return virtual_stack_registers_memory_p (x
);
7861 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
7863 if (!CONST_INT_P (XEXP (x
, 1)))
7866 offset
= INTVAL (XEXP (x
, 1));
7873 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7875 if (VECTOR_MEM_VSX_P (mode
))
7880 if (!TARGET_POWERPC64
)
7882 else if (offset
& 3)
7895 if (!TARGET_POWERPC64
)
7897 else if (offset
& 3)
7905 if (TARGET_PREFIXED_ADDR
)
7906 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
7908 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
7912 legitimate_indexed_address_p (rtx x
, int strict
)
7916 if (GET_CODE (x
) != PLUS
)
7922 return (REG_P (op0
) && REG_P (op1
)
7923 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
7924 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
7925 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
7926 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
7930 avoiding_indexed_address_p (machine_mode mode
)
7932 /* Avoid indexed addressing for modes that have non-indexed
7933 load/store instruction forms. */
7934 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
7938 legitimate_indirect_address_p (rtx x
, int strict
)
7940 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
7944 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
7946 if (!TARGET_MACHO
|| !flag_pic
7947 || mode
!= SImode
|| !MEM_P (x
))
7951 if (GET_CODE (x
) != LO_SUM
)
7953 if (!REG_P (XEXP (x
, 0)))
7955 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
7959 return CONSTANT_P (x
);
7963 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
7965 if (GET_CODE (x
) != LO_SUM
)
7967 if (!REG_P (XEXP (x
, 0)))
7969 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
7971 /* quad word addresses are restricted, and we can't use LO_SUM. */
7972 if (mode_supports_dq_form (mode
))
7976 if (TARGET_ELF
|| TARGET_MACHO
)
7980 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
7982 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7983 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7984 recognizes some LO_SUM addresses as valid although this
7985 function says opposite. In most cases, LRA through different
7986 transformations can generate correct code for address reloads.
7987 It cannot manage only some LO_SUM cases. So we need to add
7988 code here saying that some addresses are still valid. */
7989 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
7990 && small_toc_ref (x
, VOIDmode
));
7991 if (TARGET_TOC
&& ! large_toc_ok
)
7993 if (GET_MODE_NUNITS (mode
) != 1)
7995 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
7996 && !(/* ??? Assume floating point reg based on mode? */
7997 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8000 return CONSTANT_P (x
) || large_toc_ok
;
8007 /* Try machine-dependent ways of modifying an illegitimate address
8008 to be legitimate. If we find one, return the new, valid address.
8009 This is used from only one place: `memory_address' in explow.c.
8011 OLDX is the address as it was before break_out_memory_refs was
8012 called. In some cases it is useful to look at this to decide what
8015 It is always safe for this function to do nothing. It exists to
8016 recognize opportunities to optimize the output.
8018 On RS/6000, first check for the sum of a register with a constant
8019 integer that is out of range. If so, generate code to add the
8020 constant with the low-order 16 bits masked to the register and force
8021 this result into another register (this can be done with `cau').
8022 Then generate an address of REG+(CONST&0xffff), allowing for the
8023 possibility of bit 16 being a one.
8025 Then check for the sum of a register and something not constant, try to
8026 load the other things into a register and return the sum. */
8029 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
8034 if (!reg_offset_addressing_ok_p (mode
)
8035 || mode_supports_dq_form (mode
))
8037 if (virtual_stack_registers_memory_p (x
))
8040 /* In theory we should not be seeing addresses of the form reg+0,
8041 but just in case it is generated, optimize it away. */
8042 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
8043 return force_reg (Pmode
, XEXP (x
, 0));
8045 /* For TImode with load/store quad, restrict addresses to just a single
8046 pointer, so it works with both GPRs and VSX registers. */
8047 /* Make sure both operands are registers. */
8048 else if (GET_CODE (x
) == PLUS
8049 && (mode
!= TImode
|| !TARGET_VSX
))
8050 return gen_rtx_PLUS (Pmode
,
8051 force_reg (Pmode
, XEXP (x
, 0)),
8052 force_reg (Pmode
, XEXP (x
, 1)));
8054 return force_reg (Pmode
, x
);
8056 if (SYMBOL_REF_P (x
))
8058 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
8060 return rs6000_legitimize_tls_address (x
, model
);
8072 /* As in legitimate_offset_address_p we do not assume
8073 worst-case. The mode here is just a hint as to the registers
8074 used. A TImode is usually in gprs, but may actually be in
8075 fprs. Leave worst-case scenario for reload to handle via
8076 insn constraints. PTImode is only GPRs. */
8083 if (GET_CODE (x
) == PLUS
8084 && REG_P (XEXP (x
, 0))
8085 && CONST_INT_P (XEXP (x
, 1))
8086 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
8087 >= 0x10000 - extra
))
8089 HOST_WIDE_INT high_int
, low_int
;
8091 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8092 if (low_int
>= 0x8000 - extra
)
8094 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
8095 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8096 GEN_INT (high_int
)), 0);
8097 return plus_constant (Pmode
, sum
, low_int
);
8099 else if (GET_CODE (x
) == PLUS
8100 && REG_P (XEXP (x
, 0))
8101 && !CONST_INT_P (XEXP (x
, 1))
8102 && GET_MODE_NUNITS (mode
) == 1
8103 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8104 || (/* ??? Assume floating point reg based on mode? */
8105 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8106 && !avoiding_indexed_address_p (mode
))
8108 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8109 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
8111 else if ((TARGET_ELF
8113 || !MACHO_DYNAMIC_NO_PIC_P
8117 && TARGET_NO_TOC_OR_PCREL
8120 && !CONST_WIDE_INT_P (x
)
8121 && !CONST_DOUBLE_P (x
)
8123 && GET_MODE_NUNITS (mode
) == 1
8124 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8125 || (/* ??? Assume floating point reg based on mode? */
8126 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
8128 rtx reg
= gen_reg_rtx (Pmode
);
8130 emit_insn (gen_elf_high (reg
, x
));
8132 emit_insn (gen_macho_high (Pmode
, reg
, x
));
8133 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
8137 && constant_pool_expr_p (x
)
8138 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
8139 return create_TOC_reference (x
, NULL_RTX
);
8144 /* Debug version of rs6000_legitimize_address. */
8146 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
8152 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
8153 insns
= get_insns ();
8159 "\nrs6000_legitimize_address: mode %s, old code %s, "
8160 "new code %s, modified\n",
8161 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
8162 GET_RTX_NAME (GET_CODE (ret
)));
8164 fprintf (stderr
, "Original address:\n");
8167 fprintf (stderr
, "oldx:\n");
8170 fprintf (stderr
, "New address:\n");
8175 fprintf (stderr
, "Insns added:\n");
8176 debug_rtx_list (insns
, 20);
8182 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8183 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
8194 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8195 We need to emit DTP-relative relocations. */
8197 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
8199 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8204 fputs ("\t.long\t", file
);
8207 fputs (DOUBLE_INT_ASM_OP
, file
);
8212 output_addr_const (file
, x
);
8214 fputs ("@dtprel+0x8000", file
);
8215 else if (TARGET_XCOFF
&& SYMBOL_REF_P (x
))
8217 switch (SYMBOL_REF_TLS_MODEL (x
))
8221 case TLS_MODEL_LOCAL_EXEC
:
8222 fputs ("@le", file
);
8224 case TLS_MODEL_INITIAL_EXEC
:
8225 fputs ("@ie", file
);
8227 case TLS_MODEL_GLOBAL_DYNAMIC
:
8228 case TLS_MODEL_LOCAL_DYNAMIC
:
8237 /* Return true if X is a symbol that refers to real (rather than emulated)
8241 rs6000_real_tls_symbol_ref_p (rtx x
)
8243 return (SYMBOL_REF_P (x
)
8244 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
8247 /* In the name of slightly smaller debug output, and to cater to
8248 general assembler lossage, recognize various UNSPEC sequences
8249 and turn them back into a direct symbol reference. */
8252 rs6000_delegitimize_address (rtx orig_x
)
8256 if (GET_CODE (orig_x
) == UNSPEC
&& XINT (orig_x
, 1) == UNSPEC_FUSION_GPR
)
8257 orig_x
= XVECEXP (orig_x
, 0, 0);
8259 orig_x
= delegitimize_mem_from_attrs (orig_x
);
8266 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
8270 if (GET_CODE (y
) == PLUS
8271 && GET_MODE (y
) == Pmode
8272 && CONST_INT_P (XEXP (y
, 1)))
8274 offset
= XEXP (y
, 1);
8278 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
8280 y
= XVECEXP (y
, 0, 0);
8283 /* Do not associate thread-local symbols with the original
8284 constant pool symbol. */
8287 && CONSTANT_POOL_ADDRESS_P (y
)
8288 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
8292 if (offset
!= NULL_RTX
)
8293 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
8294 if (!MEM_P (orig_x
))
8297 return replace_equiv_address_nv (orig_x
, y
);
8301 && GET_CODE (orig_x
) == LO_SUM
8302 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
8304 y
= XEXP (XEXP (orig_x
, 1), 0);
8305 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8306 return XVECEXP (y
, 0, 0);
8312 /* Return true if X shouldn't be emitted into the debug info.
8313 The linker doesn't like .toc section references from
8314 .debug_* sections, so reject .toc section symbols. */
8317 rs6000_const_not_ok_for_debug_p (rtx x
)
8319 if (GET_CODE (x
) == UNSPEC
)
8321 if (SYMBOL_REF_P (x
)
8322 && CONSTANT_POOL_ADDRESS_P (x
))
8324 rtx c
= get_pool_constant (x
);
8325 machine_mode cmode
= get_pool_mode (x
);
8326 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
8333 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8336 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
8338 int icode
= INSN_CODE (insn
);
8340 /* Reject creating doloop insns. Combine should not be allowed
8341 to create these for a number of reasons:
8342 1) In a nested loop, if combine creates one of these in an
8343 outer loop and the register allocator happens to allocate ctr
8344 to the outer loop insn, then the inner loop can't use ctr.
8345 Inner loops ought to be more highly optimized.
8346 2) Combine often wants to create one of these from what was
8347 originally a three insn sequence, first combining the three
8348 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8349 allocated ctr, the splitter takes use back to the three insn
8350 sequence. It's better to stop combine at the two insn
8352 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8353 insns, the register allocator sometimes uses floating point
8354 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8355 jump insn and output reloads are not implemented for jumps,
8356 the ctrsi/ctrdi splitters need to handle all possible cases.
8357 That's a pain, and it gets to be seriously difficult when a
8358 splitter that runs after reload needs memory to transfer from
8359 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8360 for the difficult case. It's better to not create problems
8361 in the first place. */
8362 if (icode
!= CODE_FOR_nothing
8363 && (icode
== CODE_FOR_bdz_si
8364 || icode
== CODE_FOR_bdz_di
8365 || icode
== CODE_FOR_bdnz_si
8366 || icode
== CODE_FOR_bdnz_di
8367 || icode
== CODE_FOR_bdztf_si
8368 || icode
== CODE_FOR_bdztf_di
8369 || icode
== CODE_FOR_bdnztf_si
8370 || icode
== CODE_FOR_bdnztf_di
))
8376 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8378 static GTY(()) rtx rs6000_tls_symbol
;
8380 rs6000_tls_get_addr (void)
8382 if (!rs6000_tls_symbol
)
8383 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
8385 return rs6000_tls_symbol
;
8388 /* Construct the SYMBOL_REF for TLS GOT references. */
8390 static GTY(()) rtx rs6000_got_symbol
;
8392 rs6000_got_sym (void)
8394 if (!rs6000_got_symbol
)
8396 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
8397 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
8398 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
8401 return rs6000_got_symbol
;
8404 /* AIX Thread-Local Address support. */
8407 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
8409 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
8413 name
= XSTR (addr
, 0);
8414 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8415 or the symbol will be in TLS private data section. */
8416 if (name
[strlen (name
) - 1] != ']'
8417 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
8418 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
8420 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
8421 strcpy (tlsname
, name
);
8423 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
8424 tlsaddr
= copy_rtx (addr
);
8425 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
8430 /* Place addr into TOC constant pool. */
8431 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
8433 /* Output the TOC entry and create the MEM referencing the value. */
8434 if (constant_pool_expr_p (XEXP (sym
, 0))
8435 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
8437 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
8438 mem
= gen_const_mem (Pmode
, tocref
);
8439 set_mem_alias_set (mem
, get_TOC_alias_set ());
8444 /* Use global-dynamic for local-dynamic. */
8445 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
8446 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
8448 /* Create new TOC reference for @m symbol. */
8449 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
8450 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
8451 strcpy (tlsname
, "*LCM");
8452 strcat (tlsname
, name
+ 3);
8453 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
8454 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
8455 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
8456 rtx modmem
= gen_const_mem (Pmode
, tocref
);
8457 set_mem_alias_set (modmem
, get_TOC_alias_set ());
8459 rtx modreg
= gen_reg_rtx (Pmode
);
8460 emit_insn (gen_rtx_SET (modreg
, modmem
));
8462 tmpreg
= gen_reg_rtx (Pmode
);
8463 emit_insn (gen_rtx_SET (tmpreg
, mem
));
8465 dest
= gen_reg_rtx (Pmode
);
8467 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
8469 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
8472 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8473 else if (TARGET_32BIT
)
8475 tlsreg
= gen_reg_rtx (SImode
);
8476 emit_insn (gen_tls_get_tpointer (tlsreg
));
8479 tlsreg
= gen_rtx_REG (DImode
, 13);
8481 /* Load the TOC value into temporary register. */
8482 tmpreg
= gen_reg_rtx (Pmode
);
8483 emit_insn (gen_rtx_SET (tmpreg
, mem
));
8484 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
8485 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
8487 /* Add TOC symbol value to TLS pointer. */
8488 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
8493 /* Passes the tls arg value for global dynamic and local dynamic
8494 emit_library_call_value in rs6000_legitimize_tls_address to
8495 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8496 marker relocs put on __tls_get_addr calls. */
8497 static rtx global_tlsarg
;
8499 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8500 this (thread-local) address. */
8503 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
8508 return rs6000_legitimize_tls_address_aix (addr
, model
);
8510 dest
= gen_reg_rtx (Pmode
);
8511 if (model
== TLS_MODEL_LOCAL_EXEC
8512 && (rs6000_tls_size
== 16 || rs6000_pcrel_p (cfun
)))
8518 tlsreg
= gen_rtx_REG (Pmode
, 13);
8519 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
8523 tlsreg
= gen_rtx_REG (Pmode
, 2);
8524 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
8528 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
8532 tmp
= gen_reg_rtx (Pmode
);
8535 tlsreg
= gen_rtx_REG (Pmode
, 13);
8536 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
8540 tlsreg
= gen_rtx_REG (Pmode
, 2);
8541 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
8545 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
8547 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
8552 rtx got
, tga
, tmp1
, tmp2
;
8554 /* We currently use relocations like @got@tlsgd for tls, which
8555 means the linker will handle allocation of tls entries, placing
8556 them in the .got section. So use a pointer to the .got section,
8557 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8558 or to secondary GOT sections used by 32-bit -fPIC. */
8559 if (rs6000_pcrel_p (cfun
))
8561 else if (TARGET_64BIT
)
8562 got
= gen_rtx_REG (Pmode
, 2);
8566 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
8569 rtx gsym
= rs6000_got_sym ();
8570 got
= gen_reg_rtx (Pmode
);
8572 rs6000_emit_move (got
, gsym
, Pmode
);
8577 tmp1
= gen_reg_rtx (Pmode
);
8578 tmp2
= gen_reg_rtx (Pmode
);
8579 mem
= gen_const_mem (Pmode
, tmp1
);
8580 lab
= gen_label_rtx ();
8581 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
8582 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
8583 if (TARGET_LINK_STACK
)
8584 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
8585 emit_move_insn (tmp2
, mem
);
8586 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
8587 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
8592 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
8594 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
8596 tga
= rs6000_tls_get_addr ();
8597 rtx argreg
= gen_rtx_REG (Pmode
, 3);
8598 emit_insn (gen_rtx_SET (argreg
, arg
));
8599 global_tlsarg
= arg
;
8600 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
8601 global_tlsarg
= NULL_RTX
;
8603 /* Make a note so that the result of this call can be CSEd. */
8604 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
8605 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
8606 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
8608 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
8610 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
8611 tga
= rs6000_tls_get_addr ();
8612 tmp1
= gen_reg_rtx (Pmode
);
8613 rtx argreg
= gen_rtx_REG (Pmode
, 3);
8614 emit_insn (gen_rtx_SET (argreg
, arg
));
8615 global_tlsarg
= arg
;
8616 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
8617 global_tlsarg
= NULL_RTX
;
8619 /* Make a note so that the result of this call can be CSEd. */
8620 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
8621 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
8622 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
8624 if (rs6000_tls_size
== 16 || rs6000_pcrel_p (cfun
))
8627 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
8629 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
8631 else if (rs6000_tls_size
== 32)
8633 tmp2
= gen_reg_rtx (Pmode
);
8635 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
8637 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
8640 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
8642 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
8646 tmp2
= gen_reg_rtx (Pmode
);
8648 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
8650 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
8652 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
8658 /* IE, or 64-bit offset LE. */
8659 tmp2
= gen_reg_rtx (Pmode
);
8661 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
8663 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
8665 if (rs6000_pcrel_p (cfun
))
8668 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
8670 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
8672 else if (TARGET_64BIT
)
8673 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
8675 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
8683 /* Only create the global variable for the stack protect guard if we are using
8684 the global flavor of that guard. */
8686 rs6000_init_stack_protect_guard (void)
8688 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
8689 return default_stack_protect_guard ();
8694 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8697 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8699 if (GET_CODE (x
) == HIGH
8700 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
8703 /* A TLS symbol in the TOC cannot contain a sum. */
8704 if (GET_CODE (x
) == CONST
8705 && GET_CODE (XEXP (x
, 0)) == PLUS
8706 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
8707 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
8710 /* Do not place an ELF TLS symbol in the constant pool. */
8711 return TARGET_ELF
&& tls_referenced_p (x
);
8714 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8715 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8716 can be addressed relative to the toc pointer. */
8719 use_toc_relative_ref (rtx sym
, machine_mode mode
)
8721 return ((constant_pool_expr_p (sym
)
8722 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
8723 get_pool_mode (sym
)))
8724 || (TARGET_CMODEL
== CMODEL_MEDIUM
8725 && SYMBOL_REF_LOCAL_P (sym
)
8726 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
8729 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8730 that is a valid memory address for an instruction.
8731 The MODE argument is the machine mode for the MEM expression
8732 that wants to use this address.
8734 On the RS/6000, there are four valid address: a SYMBOL_REF that
8735 refers to a constant pool entry of an address (or the sum of it
8736 plus a constant), a short (16-bit signed) constant plus a register,
8737 the sum of two registers, or a register indirect, possibly with an
8738 auto-increment. For DFmode, DDmode and DImode with a constant plus
8739 register, we must ensure that both words are addressable or PowerPC64
8740 with offset word aligned.
8742 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8743 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8744 because adjacent memory cells are accessed by adding word-sized offsets
8745 during assembly output. */
8747 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
8749 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
8750 bool quad_offset_p
= mode_supports_dq_form (mode
);
8752 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8753 if (VECTOR_MEM_ALTIVEC_P (mode
)
8754 && GET_CODE (x
) == AND
8755 && CONST_INT_P (XEXP (x
, 1))
8756 && INTVAL (XEXP (x
, 1)) == -16)
8759 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
8761 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
8764 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
8765 && mode_supports_pre_incdec_p (mode
)
8766 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
8769 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8770 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
8773 /* Handle restricted vector d-form offsets in ISA 3.0. */
8776 if (quad_address_p (x
, mode
, reg_ok_strict
))
8779 else if (virtual_stack_registers_memory_p (x
))
8782 else if (reg_offset_p
)
8784 if (legitimate_small_data_p (mode
, x
))
8786 if (legitimate_constant_pool_address_p (x
, mode
,
8787 reg_ok_strict
|| lra_in_progress
))
8791 /* For TImode, if we have TImode in VSX registers, only allow register
8792 indirect addresses. This will allow the values to go in either GPRs
8793 or VSX registers without reloading. The vector types would tend to
8794 go into VSX registers, so we allow REG+REG, while TImode seems
8795 somewhat split, in that some uses are GPR based, and some VSX based. */
8796 /* FIXME: We could loosen this by changing the following to
8797 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8798 but currently we cannot allow REG+REG addressing for TImode. See
8799 PR72827 for complete details on how this ends up hoodwinking DSE. */
8800 if (mode
== TImode
&& TARGET_VSX
)
8802 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8805 && GET_CODE (x
) == PLUS
8806 && REG_P (XEXP (x
, 0))
8807 && (XEXP (x
, 0) == virtual_stack_vars_rtx
8808 || XEXP (x
, 0) == arg_pointer_rtx
)
8809 && CONST_INT_P (XEXP (x
, 1)))
8811 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
8813 if (!FLOAT128_2REG_P (mode
)
8814 && (TARGET_HARD_FLOAT
8816 || (mode
!= DFmode
&& mode
!= DDmode
))
8817 && (TARGET_POWERPC64
|| mode
!= DImode
)
8818 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
8820 && !avoiding_indexed_address_p (mode
)
8821 && legitimate_indexed_address_p (x
, reg_ok_strict
))
8823 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
8824 && mode_supports_pre_modify_p (mode
)
8825 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
8826 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
8827 reg_ok_strict
, false)
8828 || (!avoiding_indexed_address_p (mode
)
8829 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
8830 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8832 /* There is no prefixed version of the load/store with update. */
8833 rtx addr
= XEXP (x
, 1);
8834 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
8836 if (reg_offset_p
&& !quad_offset_p
8837 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
8842 /* Debug version of rs6000_legitimate_address_p. */
8844 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
8847 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
8849 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8850 "strict = %d, reload = %s, code = %s\n",
8851 ret
? "true" : "false",
8852 GET_MODE_NAME (mode
),
8854 (reload_completed
? "after" : "before"),
8855 GET_RTX_NAME (GET_CODE (x
)));
8861 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8864 rs6000_mode_dependent_address_p (const_rtx addr
,
8865 addr_space_t as ATTRIBUTE_UNUSED
)
8867 return rs6000_mode_dependent_address_ptr (addr
);
8870 /* Go to LABEL if ADDR (a legitimate address expression)
8871 has an effect that depends on the machine mode it is used for.
8873 On the RS/6000 this is true of all integral offsets (since AltiVec
8874 and VSX modes don't allow them) or is a pre-increment or decrement.
8876 ??? Except that due to conceptual problems in offsettable_address_p
8877 we can't really report the problems of integral offsets. So leave
8878 this assuming that the adjustable offset must be valid for the
8879 sub-words of a TFmode operand, which is what we had before. */
8882 rs6000_mode_dependent_address (const_rtx addr
)
8884 switch (GET_CODE (addr
))
8887 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8888 is considered a legitimate address before reload, so there
8889 are no offset restrictions in that case. Note that this
8890 condition is safe in strict mode because any address involving
8891 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8892 been rejected as illegitimate. */
8893 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
8894 && XEXP (addr
, 0) != arg_pointer_rtx
8895 && CONST_INT_P (XEXP (addr
, 1)))
8897 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
8898 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
8899 if (TARGET_PREFIXED_ADDR
)
8900 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
8902 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
8907 /* Anything in the constant pool is sufficiently aligned that
8908 all bytes have the same high part address. */
8909 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
8911 /* Auto-increment cases are now treated generically in recog.c. */
8913 return TARGET_UPDATE
;
8915 /* AND is only allowed in Altivec loads. */
8926 /* Debug version of rs6000_mode_dependent_address. */
8928 rs6000_debug_mode_dependent_address (const_rtx addr
)
8930 bool ret
= rs6000_mode_dependent_address (addr
);
8932 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
8933 ret
? "true" : "false");
8939 /* Implement FIND_BASE_TERM. */
8942 rs6000_find_base_term (rtx op
)
8947 if (GET_CODE (base
) == CONST
)
8948 base
= XEXP (base
, 0);
8949 if (GET_CODE (base
) == PLUS
)
8950 base
= XEXP (base
, 0);
8951 if (GET_CODE (base
) == UNSPEC
)
8952 switch (XINT (base
, 1))
8955 case UNSPEC_MACHOPIC_OFFSET
:
8956 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8957 for aliasing purposes. */
8958 return XVECEXP (base
, 0, 0);
8964 /* More elaborate version of recog's offsettable_memref_p predicate
8965 that works around the ??? note of rs6000_mode_dependent_address.
8966 In particular it accepts
8968 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8970 in 32-bit mode, that the recog predicate rejects. */
8973 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
8980 /* First mimic offsettable_memref_p. */
8981 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
8984 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8985 the latter predicate knows nothing about the mode of the memory
8986 reference and, therefore, assumes that it is the largest supported
8987 mode (TFmode). As a consequence, legitimate offsettable memory
8988 references are rejected. rs6000_legitimate_offset_address_p contains
8989 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8990 at least with a little bit of help here given that we know the
8991 actual registers used. */
8992 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
8993 || GET_MODE_SIZE (reg_mode
) == 4);
8994 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
8995 strict
, worst_case
);
8998 /* Determine the reassociation width to be used in reassociate_bb.
8999 This takes into account how many parallel operations we
9000 can actually do of a given type, and also the latency.
9004 vect add/sub/mul 2/cycle
9005 fp add/sub/mul 2/cycle
9010 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
9013 switch (rs6000_tune
)
9015 case PROCESSOR_POWER8
:
9016 case PROCESSOR_POWER9
:
9017 case PROCESSOR_FUTURE
:
9018 if (DECIMAL_FLOAT_MODE_P (mode
))
9020 if (VECTOR_MODE_P (mode
))
9022 if (INTEGRAL_MODE_P (mode
))
9024 if (FLOAT_MODE_P (mode
))
9033 /* Change register usage conditional on target flags. */
9035 rs6000_conditional_register_usage (void)
9039 if (TARGET_DEBUG_TARGET
)
9040 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
9042 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9044 fixed_regs
[13] = call_used_regs
[13] = 1;
9046 /* Conditionally disable FPRs. */
9047 if (TARGET_SOFT_FLOAT
)
9048 for (i
= 32; i
< 64; i
++)
9049 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9051 /* The TOC register is not killed across calls in a way that is
9052 visible to the compiler. */
9053 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9054 call_used_regs
[2] = 0;
9056 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
9057 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9059 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
9060 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
9061 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9063 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
9064 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
9065 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9067 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
9068 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9070 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
9072 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
9073 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9074 call_used_regs
[VRSAVE_REGNO
] = 1;
9077 if (TARGET_ALTIVEC
|| TARGET_VSX
)
9078 global_regs
[VSCR_REGNO
] = 1;
9080 if (TARGET_ALTIVEC_ABI
)
9082 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
9083 call_used_regs
[i
] = 1;
9085 /* AIX reserves VR20:31 in non-extended ABI mode. */
9087 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
9088 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9093 /* Output insns to set DEST equal to the constant SOURCE as a series of
9094 lis, ori and shl instructions and return TRUE. */
9097 rs6000_emit_set_const (rtx dest
, rtx source
)
9099 machine_mode mode
= GET_MODE (dest
);
9104 gcc_checking_assert (CONST_INT_P (source
));
9105 c
= INTVAL (source
);
9110 emit_insn (gen_rtx_SET (dest
, source
));
9114 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
9116 emit_insn (gen_rtx_SET (copy_rtx (temp
),
9117 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
9118 emit_insn (gen_rtx_SET (dest
,
9119 gen_rtx_IOR (SImode
, copy_rtx (temp
),
9120 GEN_INT (c
& 0xffff))));
9124 if (!TARGET_POWERPC64
)
9128 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
9130 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
9132 emit_move_insn (hi
, GEN_INT (c
>> 32));
9133 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
9134 emit_move_insn (lo
, GEN_INT (c
));
9137 rs6000_emit_set_long_const (dest
, c
);
9144 insn
= get_last_insn ();
9145 set
= single_set (insn
);
9146 if (! CONSTANT_P (SET_SRC (set
)))
9147 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
9152 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9153 Output insns to set DEST equal to the constant C as a series of
9154 lis, ori and shl instructions. */
9157 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
9160 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
9170 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
9171 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
9172 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
9174 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
9175 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
9177 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9179 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9180 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
9182 emit_move_insn (dest
,
9183 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9186 else if (ud3
== 0 && ud4
== 0)
9188 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9190 gcc_assert (ud2
& 0x8000);
9191 emit_move_insn (copy_rtx (temp
),
9192 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
9194 emit_move_insn (copy_rtx (temp
),
9195 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9197 emit_move_insn (dest
,
9198 gen_rtx_ZERO_EXTEND (DImode
,
9199 gen_lowpart (SImode
,
9202 else if ((ud4
== 0xffff && (ud3
& 0x8000))
9203 || (ud4
== 0 && ! (ud3
& 0x8000)))
9205 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9207 emit_move_insn (copy_rtx (temp
),
9208 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
9210 emit_move_insn (copy_rtx (temp
),
9211 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9213 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9214 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
9217 emit_move_insn (dest
,
9218 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9223 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9225 emit_move_insn (copy_rtx (temp
),
9226 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
9228 emit_move_insn (copy_rtx (temp
),
9229 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9232 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
9233 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
9236 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9237 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9238 GEN_INT (ud2
<< 16)));
9240 emit_move_insn (dest
,
9241 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9246 /* Helper for the following. Get rid of [r+r] memory refs
9247 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9250 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
9252 if (MEM_P (operands
[0])
9253 && !REG_P (XEXP (operands
[0], 0))
9254 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
9255 GET_MODE (operands
[0]), false))
9257 = replace_equiv_address (operands
[0],
9258 copy_addr_to_reg (XEXP (operands
[0], 0)));
9260 if (MEM_P (operands
[1])
9261 && !REG_P (XEXP (operands
[1], 0))
9262 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
9263 GET_MODE (operands
[1]), false))
9265 = replace_equiv_address (operands
[1],
9266 copy_addr_to_reg (XEXP (operands
[1], 0)));
9269 /* Generate a vector of constants to permute MODE for a little-endian
9270 storage operation by swapping the two halves of a vector. */
9272 rs6000_const_vec (machine_mode mode
)
9300 v
= rtvec_alloc (subparts
);
9302 for (i
= 0; i
< subparts
/ 2; ++i
)
9303 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
9304 for (i
= subparts
/ 2; i
< subparts
; ++i
)
9305 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
9310 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9313 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
9315 /* Scalar permutations are easier to express in integer modes rather than
9316 floating-point modes, so cast them here. We use V1TImode instead
9317 of TImode to ensure that the values don't go through GPRs. */
9318 if (FLOAT128_VECTOR_P (mode
))
9320 dest
= gen_lowpart (V1TImode
, dest
);
9321 source
= gen_lowpart (V1TImode
, source
);
9325 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9327 if (mode
== TImode
|| mode
== V1TImode
)
9328 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
9332 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
9333 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
9337 /* Emit a little-endian load from vector memory location SOURCE to VSX
9338 register DEST in mode MODE. The load is done with two permuting
9339 insn's that represent an lxvd2x and xxpermdi. */
9341 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
9343 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9345 if (mode
== TImode
|| mode
== V1TImode
)
9348 dest
= gen_lowpart (V2DImode
, dest
);
9349 source
= adjust_address (source
, V2DImode
, 0);
9352 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
9353 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
9354 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
9357 /* Emit a little-endian store to vector memory location DEST from VSX
9358 register SOURCE in mode MODE. The store is done with two permuting
9359 insn's that represent an xxpermdi and an stxvd2x. */
9361 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
9363 /* This should never be called during or after LRA, because it does
9364 not re-permute the source register. It is intended only for use
9366 gcc_assert (!lra_in_progress
&& !reload_completed
);
9368 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9370 if (mode
== TImode
|| mode
== V1TImode
)
9373 dest
= adjust_address (dest
, V2DImode
, 0);
9374 source
= gen_lowpart (V2DImode
, source
);
9377 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source
) : source
;
9378 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
9379 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
9382 /* Emit a sequence representing a little-endian VSX load or store,
9383 moving data from SOURCE to DEST in mode MODE. This is done
9384 separately from rs6000_emit_move to ensure it is called only
9385 during expand. LE VSX loads and stores introduced later are
9386 handled with a split. The expand-time RTL generation allows
9387 us to optimize away redundant pairs of register-permutes. */
9389 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
9391 gcc_assert (!BYTES_BIG_ENDIAN
9392 && VECTOR_MEM_VSX_P (mode
)
9393 && !TARGET_P9_VECTOR
9394 && !gpr_or_gpr_p (dest
, source
)
9395 && (MEM_P (source
) ^ MEM_P (dest
)));
9399 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
9400 rs6000_emit_le_vsx_load (dest
, source
, mode
);
9404 if (!REG_P (source
))
9405 source
= force_reg (mode
, source
);
9406 rs6000_emit_le_vsx_store (dest
, source
, mode
);
9410 /* Return whether a SFmode or SImode move can be done without converting one
9411 mode to another. This arrises when we have:
9413 (SUBREG:SF (REG:SI ...))
9414 (SUBREG:SI (REG:SF ...))
9416 and one of the values is in a floating point/vector register, where SFmode
9417 scalars are stored in DFmode format. */
9420 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
9422 if (TARGET_ALLOW_SF_SUBREG
)
9425 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
9428 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
9431 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9432 if (SUBREG_P (dest
))
9434 rtx dest_subreg
= SUBREG_REG (dest
);
9435 rtx src_subreg
= SUBREG_REG (src
);
9436 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
9443 /* Helper function to change moves with:
9445 (SUBREG:SF (REG:SI)) and
9446 (SUBREG:SI (REG:SF))
9448 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9449 values are stored as DFmode values in the VSX registers. We need to convert
9450 the bits before we can use a direct move or operate on the bits in the
9451 vector register as an integer type.
9453 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9456 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
9458 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
9459 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
9460 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
9462 rtx inner_source
= SUBREG_REG (source
);
9463 machine_mode inner_mode
= GET_MODE (inner_source
);
9465 if (mode
== SImode
&& inner_mode
== SFmode
)
9467 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
9471 if (mode
== SFmode
&& inner_mode
== SImode
)
9473 emit_insn (gen_movsf_from_si (dest
, inner_source
));
9481 /* Emit a move from SOURCE to DEST in mode MODE. */
9483 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
9487 operands
[1] = source
;
9489 if (TARGET_DEBUG_ADDR
)
9492 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9493 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9494 GET_MODE_NAME (mode
),
9497 can_create_pseudo_p ());
9499 fprintf (stderr
, "source:\n");
9503 /* Check that we get CONST_WIDE_INT only when we should. */
9504 if (CONST_WIDE_INT_P (operands
[1])
9505 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
9508 #ifdef HAVE_AS_GNU_ATTRIBUTE
9509 /* If we use a long double type, set the flags in .gnu_attribute that say
9510 what the long double type is. This is to allow the linker's warning
9511 message for the wrong long double to be useful, even if the function does
9512 not do a call (for example, doing a 128-bit add on power9 if the long
9513 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9514 used if they aren't the default long dobule type. */
9515 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
9517 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
9518 rs6000_passes_float
= rs6000_passes_long_double
= true;
9520 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
9521 rs6000_passes_float
= rs6000_passes_long_double
= true;
9525 /* See if we need to special case SImode/SFmode SUBREG moves. */
9526 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
9527 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
9530 /* Check if GCC is setting up a block move that will end up using FP
9531 registers as temporaries. We must make sure this is acceptable. */
9532 if (MEM_P (operands
[0])
9533 && MEM_P (operands
[1])
9535 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
9536 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
9537 && ! (rs6000_slow_unaligned_access (SImode
,
9538 (MEM_ALIGN (operands
[0]) > 32
9539 ? 32 : MEM_ALIGN (operands
[0])))
9540 || rs6000_slow_unaligned_access (SImode
,
9541 (MEM_ALIGN (operands
[1]) > 32
9542 ? 32 : MEM_ALIGN (operands
[1]))))
9543 && ! MEM_VOLATILE_P (operands
[0])
9544 && ! MEM_VOLATILE_P (operands
[1]))
9546 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
9547 adjust_address (operands
[1], SImode
, 0));
9548 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
9549 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
9553 if (can_create_pseudo_p () && MEM_P (operands
[0])
9554 && !gpc_reg_operand (operands
[1], mode
))
9555 operands
[1] = force_reg (mode
, operands
[1]);
9557 /* Recognize the case where operand[1] is a reference to thread-local
9558 data and load its address to a register. */
9559 if (tls_referenced_p (operands
[1]))
9561 enum tls_model model
;
9562 rtx tmp
= operands
[1];
9565 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
9567 addend
= XEXP (XEXP (tmp
, 0), 1);
9568 tmp
= XEXP (XEXP (tmp
, 0), 0);
9571 gcc_assert (SYMBOL_REF_P (tmp
));
9572 model
= SYMBOL_REF_TLS_MODEL (tmp
);
9573 gcc_assert (model
!= 0);
9575 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
9578 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
9579 tmp
= force_operand (tmp
, operands
[0]);
9584 /* 128-bit constant floating-point values on Darwin should really be loaded
9585 as two parts. However, this premature splitting is a problem when DFmode
9586 values can go into Altivec registers. */
9587 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
9588 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
9590 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
9591 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
9593 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
9594 GET_MODE_SIZE (DFmode
)),
9595 simplify_gen_subreg (DFmode
, operands
[1], mode
,
9596 GET_MODE_SIZE (DFmode
)),
9601 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9602 p1:SD) if p1 is not of floating point class and p0 is spilled as
9603 we can have no analogous movsd_store for this. */
9604 if (lra_in_progress
&& mode
== DDmode
9605 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
9606 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
9607 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
9608 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
9611 int regno
= REGNO (SUBREG_REG (operands
[1]));
9613 if (!HARD_REGISTER_NUM_P (regno
))
9615 cl
= reg_preferred_class (regno
);
9616 regno
= reg_renumber
[regno
];
9618 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
9620 if (regno
>= 0 && ! FP_REGNO_P (regno
))
9623 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
9624 operands
[1] = SUBREG_REG (operands
[1]);
9629 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
9630 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
9631 && (REG_P (operands
[1])
9632 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
9634 int regno
= reg_or_subregno (operands
[1]);
9637 if (!HARD_REGISTER_NUM_P (regno
))
9639 cl
= reg_preferred_class (regno
);
9640 gcc_assert (cl
!= NO_REGS
);
9641 regno
= reg_renumber
[regno
];
9643 regno
= ira_class_hard_regs
[cl
][0];
9645 if (FP_REGNO_P (regno
))
9647 if (GET_MODE (operands
[0]) != DDmode
)
9648 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
9649 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
9651 else if (INT_REGNO_P (regno
))
9652 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
9657 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9658 p:DD)) if p0 is not of floating point class and p1 is spilled as
9659 we can have no analogous movsd_load for this. */
9660 if (lra_in_progress
&& mode
== DDmode
9661 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
9662 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
9663 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
9664 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
9667 int regno
= REGNO (SUBREG_REG (operands
[0]));
9669 if (!HARD_REGISTER_NUM_P (regno
))
9671 cl
= reg_preferred_class (regno
);
9672 regno
= reg_renumber
[regno
];
9674 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
9676 if (regno
>= 0 && ! FP_REGNO_P (regno
))
9679 operands
[0] = SUBREG_REG (operands
[0]);
9680 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
9685 && (REG_P (operands
[0])
9686 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
9687 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
9688 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
9690 int regno
= reg_or_subregno (operands
[0]);
9693 if (!HARD_REGISTER_NUM_P (regno
))
9695 cl
= reg_preferred_class (regno
);
9696 gcc_assert (cl
!= NO_REGS
);
9697 regno
= reg_renumber
[regno
];
9699 regno
= ira_class_hard_regs
[cl
][0];
9701 if (FP_REGNO_P (regno
))
9703 if (GET_MODE (operands
[1]) != DDmode
)
9704 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
9705 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
9707 else if (INT_REGNO_P (regno
))
9708 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
9714 /* FIXME: In the long term, this switch statement should go away
9715 and be replaced by a sequence of tests based on things like
9721 if (CONSTANT_P (operands
[1])
9722 && !CONST_INT_P (operands
[1]))
9723 operands
[1] = force_const_mem (mode
, operands
[1]);
9730 if (FLOAT128_2REG_P (mode
))
9731 rs6000_eliminate_indexed_memrefs (operands
);
9738 if (CONSTANT_P (operands
[1])
9739 && ! easy_fp_constant (operands
[1], mode
))
9740 operands
[1] = force_const_mem (mode
, operands
[1]);
9750 if (CONSTANT_P (operands
[1])
9751 && !easy_vector_constant (operands
[1], mode
))
9752 operands
[1] = force_const_mem (mode
, operands
[1]);
9757 /* Use default pattern for address of ELF small data */
9760 && DEFAULT_ABI
== ABI_V4
9761 && (SYMBOL_REF_P (operands
[1])
9762 || GET_CODE (operands
[1]) == CONST
)
9763 && small_data_operand (operands
[1], mode
))
9765 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
9769 /* Use the default pattern for loading up PC-relative addresses. */
9770 if (TARGET_PCREL
&& mode
== Pmode
9771 && pcrel_local_or_external_address (operands
[1], Pmode
))
9773 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
9777 if (DEFAULT_ABI
== ABI_V4
9778 && mode
== Pmode
&& mode
== SImode
9779 && flag_pic
== 1 && got_operand (operands
[1], mode
))
9781 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
9785 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
9786 && TARGET_NO_TOC_OR_PCREL
9789 && CONSTANT_P (operands
[1])
9790 && GET_CODE (operands
[1]) != HIGH
9791 && !CONST_INT_P (operands
[1]))
9793 rtx target
= (!can_create_pseudo_p ()
9795 : gen_reg_rtx (mode
));
9797 /* If this is a function address on -mcall-aixdesc,
9798 convert it to the address of the descriptor. */
9799 if (DEFAULT_ABI
== ABI_AIX
9800 && SYMBOL_REF_P (operands
[1])
9801 && XSTR (operands
[1], 0)[0] == '.')
9803 const char *name
= XSTR (operands
[1], 0);
9805 while (*name
== '.')
9807 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
9808 CONSTANT_POOL_ADDRESS_P (new_ref
)
9809 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
9810 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
9811 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
9812 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
9813 operands
[1] = new_ref
;
9816 if (DEFAULT_ABI
== ABI_DARWIN
)
9819 /* This is not PIC code, but could require the subset of
9820 indirections used by mdynamic-no-pic. */
9821 if (MACHO_DYNAMIC_NO_PIC_P
)
9823 /* Take care of any required data indirection. */
9824 operands
[1] = rs6000_machopic_legitimize_pic_address (
9825 operands
[1], mode
, operands
[0]);
9826 if (operands
[0] != operands
[1])
9827 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
9831 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
9832 emit_insn (gen_macho_low (Pmode
, operands
[0],
9833 target
, operands
[1]));
9837 emit_insn (gen_elf_high (target
, operands
[1]));
9838 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
9842 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9843 and we have put it in the TOC, we just need to make a TOC-relative
9846 && SYMBOL_REF_P (operands
[1])
9847 && use_toc_relative_ref (operands
[1], mode
))
9848 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
9849 else if (mode
== Pmode
9850 && CONSTANT_P (operands
[1])
9851 && GET_CODE (operands
[1]) != HIGH
9852 && ((REG_P (operands
[0])
9853 && FP_REGNO_P (REGNO (operands
[0])))
9854 || !CONST_INT_P (operands
[1])
9855 || (num_insns_constant (operands
[1], mode
)
9856 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
9857 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
9858 && (TARGET_CMODEL
== CMODEL_SMALL
9859 || can_create_pseudo_p ()
9860 || (REG_P (operands
[0])
9861 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
9865 /* Darwin uses a special PIC legitimizer. */
9866 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
9869 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
9871 if (operands
[0] != operands
[1])
9872 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
9877 /* If we are to limit the number of things we put in the TOC and
9878 this is a symbol plus a constant we can add in one insn,
9879 just put the symbol in the TOC and add the constant. */
9880 if (GET_CODE (operands
[1]) == CONST
9881 && TARGET_NO_SUM_IN_TOC
9882 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
9883 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
9884 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
9885 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
9886 && ! side_effects_p (operands
[0]))
9889 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
9890 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
9892 sym
= force_reg (mode
, sym
);
9893 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
9897 operands
[1] = force_const_mem (mode
, operands
[1]);
9900 && SYMBOL_REF_P (XEXP (operands
[1], 0))
9901 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
9903 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
9905 operands
[1] = gen_const_mem (mode
, tocref
);
9906 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
9912 if (!VECTOR_MEM_VSX_P (TImode
))
9913 rs6000_eliminate_indexed_memrefs (operands
);
9917 rs6000_eliminate_indexed_memrefs (operands
);
9921 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
9924 /* Above, we may have called force_const_mem which may have returned
9925 an invalid address. If we can, fix this up; otherwise, reload will
9926 have to deal with it. */
9927 if (MEM_P (operands
[1]))
9928 operands
[1] = validize_mem (operands
[1]);
9930 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
9934 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9936 init_float128_ibm (machine_mode mode
)
9938 if (!TARGET_XL_COMPAT
)
9940 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
9941 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
9942 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
9943 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
9945 if (!TARGET_HARD_FLOAT
)
9947 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
9948 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
9949 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
9950 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
9951 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
9952 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
9953 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
9954 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
9956 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
9957 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
9958 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
9959 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
9960 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
9961 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
9962 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
9963 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
9968 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
9969 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
9970 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
9971 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
9974 /* Add various conversions for IFmode to use the traditional TFmode
9978 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
9979 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
9980 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
9981 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
9982 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
9983 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
9985 if (TARGET_POWERPC64
)
9987 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
9988 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
9989 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
9990 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
9995 /* Create a decl for either complex long double multiply or complex long double
9996 divide when long double is IEEE 128-bit floating point. We can't use
9997 __multc3 and __divtc3 because the original long double using IBM extended
9998 double used those names. The complex multiply/divide functions are encoded
9999 as builtin functions with a complex result and 4 scalar inputs. */
10002 create_complex_muldiv (const char *name
, built_in_function fncode
, tree fntype
)
10004 tree fndecl
= add_builtin_function (name
, fntype
, fncode
, BUILT_IN_NORMAL
,
10007 set_builtin_decl (fncode
, fndecl
, true);
10009 if (TARGET_DEBUG_BUILTIN
)
10010 fprintf (stderr
, "create complex %s, fncode: %d\n", name
, (int) fncode
);
10015 /* Set up IEEE 128-bit floating point routines. Use different names if the
10016 arguments can be passed in a vector register. The historical PowerPC
10017 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10018 continue to use that if we aren't using vector registers to pass IEEE
10019 128-bit floating point. */
10022 init_float128_ieee (machine_mode mode
)
10024 if (FLOAT128_VECTOR_P (mode
))
10026 static bool complex_muldiv_init_p
= false;
10028 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10029 we have clone or target attributes, this will be called a second
10030 time. We want to create the built-in function only once. */
10031 if (mode
== TFmode
&& TARGET_IEEEQUAD
&& !complex_muldiv_init_p
)
10033 complex_muldiv_init_p
= true;
10034 built_in_function fncode_mul
=
10035 (built_in_function
) (BUILT_IN_COMPLEX_MUL_MIN
+ TCmode
10036 - MIN_MODE_COMPLEX_FLOAT
);
10037 built_in_function fncode_div
=
10038 (built_in_function
) (BUILT_IN_COMPLEX_DIV_MIN
+ TCmode
10039 - MIN_MODE_COMPLEX_FLOAT
);
10041 tree fntype
= build_function_type_list (complex_long_double_type_node
,
10042 long_double_type_node
,
10043 long_double_type_node
,
10044 long_double_type_node
,
10045 long_double_type_node
,
10048 create_complex_muldiv ("__mulkc3", fncode_mul
, fntype
);
10049 create_complex_muldiv ("__divkc3", fncode_div
, fntype
);
10052 set_optab_libfunc (add_optab
, mode
, "__addkf3");
10053 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
10054 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
10055 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
10056 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
10057 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
10058 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
10059 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
10061 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
10062 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
10063 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
10064 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
10065 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
10066 set_optab_libfunc (le_optab
, mode
, "__lekf2");
10067 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
10069 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
10070 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
10071 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
10072 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
10074 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
10075 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
10076 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
10078 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
10079 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
10080 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
10082 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
10083 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
10084 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
10085 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
10086 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
10087 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
10089 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
10090 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
10091 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
10092 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
10094 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
10095 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
10096 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
10097 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
10099 if (TARGET_POWERPC64
)
10101 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
10102 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
10103 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
10104 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
10110 set_optab_libfunc (add_optab
, mode
, "_q_add");
10111 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
10112 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
10113 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
10114 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
10115 if (TARGET_PPC_GPOPT
)
10116 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
10118 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
10119 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
10120 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
10121 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
10122 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
10123 set_optab_libfunc (le_optab
, mode
, "_q_fle");
10125 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
10126 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
10127 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
10128 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
10129 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
10130 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
10131 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
10132 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
10137 rs6000_init_libfuncs (void)
10139 /* __float128 support. */
10140 if (TARGET_FLOAT128_TYPE
)
10142 init_float128_ibm (IFmode
);
10143 init_float128_ieee (KFmode
);
10146 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10147 if (TARGET_LONG_DOUBLE_128
)
10149 if (!TARGET_IEEEQUAD
)
10150 init_float128_ibm (TFmode
);
10152 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10154 init_float128_ieee (TFmode
);
10158 /* Emit a potentially record-form instruction, setting DST from SRC.
10159 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10160 signed comparison of DST with zero. If DOT is 1, the generated RTL
10161 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10162 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10163 a separate COMPARE. */
10166 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
10170 emit_move_insn (dst
, src
);
10174 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
10176 emit_move_insn (dst
, src
);
10177 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
10181 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
10184 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
10185 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
10189 rtx set
= gen_rtx_SET (dst
, src
);
10190 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
10195 /* A validation routine: say whether CODE, a condition code, and MODE
10196 match. The other alternatives either don't make sense or should
10197 never be generated. */
10200 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
10202 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
10203 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
10204 && GET_MODE_CLASS (mode
) == MODE_CC
);
10206 /* These don't make sense. */
10207 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
10208 || mode
!= CCUNSmode
);
10210 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
10211 || mode
== CCUNSmode
);
10213 gcc_assert (mode
== CCFPmode
10214 || (code
!= ORDERED
&& code
!= UNORDERED
10215 && code
!= UNEQ
&& code
!= LTGT
10216 && code
!= UNGT
&& code
!= UNLT
10217 && code
!= UNGE
&& code
!= UNLE
));
10219 /* These are invalid; the information is not there. */
10220 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
10224 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10225 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10226 not zero, store there the bit offset (counted from the right) where
10227 the single stretch of 1 bits begins; and similarly for B, the bit
10228 offset where it ends. */
10231 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
10233 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
10234 unsigned HOST_WIDE_INT bit
;
10236 int n
= GET_MODE_PRECISION (mode
);
10238 if (mode
!= DImode
&& mode
!= SImode
)
10241 if (INTVAL (mask
) >= 0)
10244 ne
= exact_log2 (bit
);
10245 nb
= exact_log2 (val
+ bit
);
10247 else if (val
+ 1 == 0)
10256 nb
= exact_log2 (bit
);
10257 ne
= exact_log2 (val
+ bit
);
10262 ne
= exact_log2 (bit
);
10263 if (val
+ bit
== 0)
10271 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
10282 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10283 or rldicr instruction, to implement an AND with it in mode MODE. */
10286 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
10290 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10293 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10295 if (mode
== DImode
)
10296 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
10298 /* For SImode, rlwinm can do everything. */
10299 if (mode
== SImode
)
10300 return (nb
< 32 && ne
< 32);
10305 /* Return the instruction template for an AND with mask in mode MODE, with
10306 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10309 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
10313 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
10314 gcc_unreachable ();
10316 if (mode
== DImode
&& ne
== 0)
10318 operands
[3] = GEN_INT (63 - nb
);
10320 return "rldicl. %0,%1,0,%3";
10321 return "rldicl %0,%1,0,%3";
10324 if (mode
== DImode
&& nb
== 63)
10326 operands
[3] = GEN_INT (63 - ne
);
10328 return "rldicr. %0,%1,0,%3";
10329 return "rldicr %0,%1,0,%3";
10332 if (nb
< 32 && ne
< 32)
10334 operands
[3] = GEN_INT (31 - nb
);
10335 operands
[4] = GEN_INT (31 - ne
);
10337 return "rlwinm. %0,%1,0,%3,%4";
10338 return "rlwinm %0,%1,0,%3,%4";
10341 gcc_unreachable ();
10344 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10345 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10346 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10349 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
10353 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10356 int n
= GET_MODE_PRECISION (mode
);
10359 if (CONST_INT_P (XEXP (shift
, 1)))
10361 sh
= INTVAL (XEXP (shift
, 1));
10362 if (sh
< 0 || sh
>= n
)
10366 rtx_code code
= GET_CODE (shift
);
10368 /* Convert any shift by 0 to a rotate, to simplify below code. */
10372 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10373 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
10375 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
10381 /* DImode rotates need rld*. */
10382 if (mode
== DImode
&& code
== ROTATE
)
10383 return (nb
== 63 || ne
== 0 || ne
== sh
);
10385 /* SImode rotates need rlw*. */
10386 if (mode
== SImode
&& code
== ROTATE
)
10387 return (nb
< 32 && ne
< 32 && sh
< 32);
10389 /* Wrap-around masks are only okay for rotates. */
10393 /* Variable shifts are only okay for rotates. */
10397 /* Don't allow ASHIFT if the mask is wrong for that. */
10398 if (code
== ASHIFT
&& ne
< sh
)
10401 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10402 if the mask is wrong for that. */
10403 if (nb
< 32 && ne
< 32 && sh
< 32
10404 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
10407 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10408 if the mask is wrong for that. */
10409 if (code
== LSHIFTRT
)
10411 if (nb
== 63 || ne
== 0 || ne
== sh
)
10412 return !(code
== LSHIFTRT
&& nb
>= sh
);
10417 /* Return the instruction template for a shift with mask in mode MODE, with
10418 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10421 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
10425 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
10426 gcc_unreachable ();
10428 if (mode
== DImode
&& ne
== 0)
10430 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
10431 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
10432 operands
[3] = GEN_INT (63 - nb
);
10434 return "rld%I2cl. %0,%1,%2,%3";
10435 return "rld%I2cl %0,%1,%2,%3";
10438 if (mode
== DImode
&& nb
== 63)
10440 operands
[3] = GEN_INT (63 - ne
);
10442 return "rld%I2cr. %0,%1,%2,%3";
10443 return "rld%I2cr %0,%1,%2,%3";
10447 && GET_CODE (operands
[4]) != LSHIFTRT
10448 && CONST_INT_P (operands
[2])
10449 && ne
== INTVAL (operands
[2]))
10451 operands
[3] = GEN_INT (63 - nb
);
10453 return "rld%I2c. %0,%1,%2,%3";
10454 return "rld%I2c %0,%1,%2,%3";
10457 if (nb
< 32 && ne
< 32)
10459 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
10460 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
10461 operands
[3] = GEN_INT (31 - nb
);
10462 operands
[4] = GEN_INT (31 - ne
);
10463 /* This insn can also be a 64-bit rotate with mask that really makes
10464 it just a shift right (with mask); the %h below are to adjust for
10465 that situation (shift count is >= 32 in that case). */
10467 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10468 return "rlw%I2nm %0,%1,%h2,%3,%4";
10471 gcc_unreachable ();
10474 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10475 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10476 ASHIFT, or LSHIFTRT) in mode MODE. */
10479 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
10483 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10486 int n
= GET_MODE_PRECISION (mode
);
10488 int sh
= INTVAL (XEXP (shift
, 1));
10489 if (sh
< 0 || sh
>= n
)
10492 rtx_code code
= GET_CODE (shift
);
10494 /* Convert any shift by 0 to a rotate, to simplify below code. */
10498 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10499 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
10501 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
10507 /* DImode rotates need rldimi. */
10508 if (mode
== DImode
&& code
== ROTATE
)
10511 /* SImode rotates need rlwimi. */
10512 if (mode
== SImode
&& code
== ROTATE
)
10513 return (nb
< 32 && ne
< 32 && sh
< 32);
10515 /* Wrap-around masks are only okay for rotates. */
10519 /* Don't allow ASHIFT if the mask is wrong for that. */
10520 if (code
== ASHIFT
&& ne
< sh
)
10523 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10524 if the mask is wrong for that. */
10525 if (nb
< 32 && ne
< 32 && sh
< 32
10526 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
10529 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10530 if the mask is wrong for that. */
10531 if (code
== LSHIFTRT
)
10534 return !(code
== LSHIFTRT
&& nb
>= sh
);
10539 /* Return the instruction template for an insert with mask in mode MODE, with
10540 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10543 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
10547 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
10548 gcc_unreachable ();
10550 /* Prefer rldimi because rlwimi is cracked. */
10551 if (TARGET_POWERPC64
10552 && (!dot
|| mode
== DImode
)
10553 && GET_CODE (operands
[4]) != LSHIFTRT
10554 && ne
== INTVAL (operands
[2]))
10556 operands
[3] = GEN_INT (63 - nb
);
10558 return "rldimi. %0,%1,%2,%3";
10559 return "rldimi %0,%1,%2,%3";
10562 if (nb
< 32 && ne
< 32)
10564 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
10565 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
10566 operands
[3] = GEN_INT (31 - nb
);
10567 operands
[4] = GEN_INT (31 - ne
);
10569 return "rlwimi. %0,%1,%2,%3,%4";
10570 return "rlwimi %0,%1,%2,%3,%4";
10573 gcc_unreachable ();
10576 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10577 using two machine instructions. */
10580 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
10582 /* There are two kinds of AND we can handle with two insns:
10583 1) those we can do with two rl* insn;
10586 We do not handle that last case yet. */
10588 /* If there is just one stretch of ones, we can do it. */
10589 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
10592 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10593 one insn, we can do the whole thing with two. */
10594 unsigned HOST_WIDE_INT val
= INTVAL (c
);
10595 unsigned HOST_WIDE_INT bit1
= val
& -val
;
10596 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
10597 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
10598 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
10599 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
10602 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10603 If EXPAND is true, split rotate-and-mask instructions we generate to
10604 their constituent parts as well (this is used during expand); if DOT
10605 is 1, make the last insn a record-form instruction clobbering the
10606 destination GPR and setting the CC reg (from operands[3]); if 2, set
10607 that GPR as well as the CC reg. */
10610 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
10612 gcc_assert (!(expand
&& dot
));
10614 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
10616 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10617 shift right. This generates better code than doing the masks without
10618 shifts, or shifting first right and then left. */
10620 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
10622 gcc_assert (mode
== DImode
);
10624 int shift
= 63 - nb
;
10627 rtx tmp1
= gen_reg_rtx (DImode
);
10628 rtx tmp2
= gen_reg_rtx (DImode
);
10629 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
10630 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
10631 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
10635 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
10636 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
10637 emit_move_insn (operands
[0], tmp
);
10638 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
10639 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
10644 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10645 that does the rest. */
10646 unsigned HOST_WIDE_INT bit1
= val
& -val
;
10647 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
10648 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
10649 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
10651 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
10652 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
10654 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
10656 /* Two "no-rotate"-and-mask instructions, for SImode. */
10657 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
10659 gcc_assert (mode
== SImode
);
10661 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
10662 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
10663 emit_move_insn (reg
, tmp
);
10664 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
10665 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
10669 gcc_assert (mode
== DImode
);
10671 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10672 insns; we have to do the first in SImode, because it wraps. */
10673 if (mask2
<= 0xffffffff
10674 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
10676 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
10677 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
10679 rtx reg_low
= gen_lowpart (SImode
, reg
);
10680 emit_move_insn (reg_low
, tmp
);
10681 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
10682 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
10686 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10687 at the top end), rotate back and clear the other hole. */
10688 int right
= exact_log2 (bit3
);
10689 int left
= 64 - right
;
10691 /* Rotate the mask too. */
10692 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
10696 rtx tmp1
= gen_reg_rtx (DImode
);
10697 rtx tmp2
= gen_reg_rtx (DImode
);
10698 rtx tmp3
= gen_reg_rtx (DImode
);
10699 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
10700 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
10701 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
10702 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
10706 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
10707 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
10708 emit_move_insn (operands
[0], tmp
);
10709 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
10710 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
10711 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
10715 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10716 for lfq and stfq insns iff the registers are hard registers. */
10719 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
10721 /* We might have been passed a SUBREG. */
10722 if (!REG_P (reg1
) || !REG_P (reg2
))
10725 /* We might have been passed non floating point registers. */
10726 if (!FP_REGNO_P (REGNO (reg1
))
10727 || !FP_REGNO_P (REGNO (reg2
)))
10730 return (REGNO (reg1
) == REGNO (reg2
) - 1);
10733 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10734 addr1 and addr2 must be in consecutive memory locations
10735 (addr2 == addr1 + 8). */
10738 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
10741 unsigned int reg1
, reg2
;
10742 int offset1
, offset2
;
10744 /* The mems cannot be volatile. */
10745 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
10748 addr1
= XEXP (mem1
, 0);
10749 addr2
= XEXP (mem2
, 0);
10751 /* Extract an offset (if used) from the first addr. */
10752 if (GET_CODE (addr1
) == PLUS
)
10754 /* If not a REG, return zero. */
10755 if (!REG_P (XEXP (addr1
, 0)))
10759 reg1
= REGNO (XEXP (addr1
, 0));
10760 /* The offset must be constant! */
10761 if (!CONST_INT_P (XEXP (addr1
, 1)))
10763 offset1
= INTVAL (XEXP (addr1
, 1));
10766 else if (!REG_P (addr1
))
10770 reg1
= REGNO (addr1
);
10771 /* This was a simple (mem (reg)) expression. Offset is 0. */
10775 /* And now for the second addr. */
10776 if (GET_CODE (addr2
) == PLUS
)
10778 /* If not a REG, return zero. */
10779 if (!REG_P (XEXP (addr2
, 0)))
10783 reg2
= REGNO (XEXP (addr2
, 0));
10784 /* The offset must be constant. */
10785 if (!CONST_INT_P (XEXP (addr2
, 1)))
10787 offset2
= INTVAL (XEXP (addr2
, 1));
10790 else if (!REG_P (addr2
))
10794 reg2
= REGNO (addr2
);
10795 /* This was a simple (mem (reg)) expression. Offset is 0. */
10799 /* Both of these must have the same base register. */
10803 /* The offset for the second addr must be 8 more than the first addr. */
10804 if (offset2
!= offset1
+ 8)
10807 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10812 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10813 need to use DDmode, in all other cases we can use the same mode. */
10814 static machine_mode
10815 rs6000_secondary_memory_needed_mode (machine_mode mode
)
10817 if (lra_in_progress
&& mode
== SDmode
)
10822 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10823 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10824 only work on the traditional altivec registers, note if an altivec register
10827 static enum rs6000_reg_type
10828 register_to_reg_type (rtx reg
, bool *is_altivec
)
10830 HOST_WIDE_INT regno
;
10831 enum reg_class rclass
;
10833 if (SUBREG_P (reg
))
10834 reg
= SUBREG_REG (reg
);
10837 return NO_REG_TYPE
;
10839 regno
= REGNO (reg
);
10840 if (!HARD_REGISTER_NUM_P (regno
))
10842 if (!lra_in_progress
&& !reload_completed
)
10843 return PSEUDO_REG_TYPE
;
10845 regno
= true_regnum (reg
);
10846 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
10847 return PSEUDO_REG_TYPE
;
10850 gcc_assert (regno
>= 0);
10852 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
10853 *is_altivec
= true;
10855 rclass
= rs6000_regno_regclass
[regno
];
10856 return reg_class_to_reg_type
[(int)rclass
];
10859 /* Helper function to return the cost of adding a TOC entry address. */
10862 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
10866 if (TARGET_CMODEL
!= CMODEL_SMALL
)
10867 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
10870 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
10875 /* Helper function for rs6000_secondary_reload to determine whether the memory
10876 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10877 needs reloading. Return negative if the memory is not handled by the memory
10878 helper functions and to try a different reload method, 0 if no additional
10879 instructions are need, and positive to give the extra cost for the
10883 rs6000_secondary_reload_memory (rtx addr
,
10884 enum reg_class rclass
,
10887 int extra_cost
= 0;
10888 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
10889 addr_mask_type addr_mask
;
10890 const char *type
= NULL
;
10891 const char *fail_msg
= NULL
;
10893 if (GPR_REG_CLASS_P (rclass
))
10894 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
10896 else if (rclass
== FLOAT_REGS
)
10897 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
10899 else if (rclass
== ALTIVEC_REGS
)
10900 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
10902 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10903 else if (rclass
== VSX_REGS
)
10904 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
10905 & ~RELOAD_REG_AND_M16
);
10907 /* If the register allocator hasn't made up its mind yet on the register
10908 class to use, settle on defaults to use. */
10909 else if (rclass
== NO_REGS
)
10911 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
10912 & ~RELOAD_REG_AND_M16
);
10914 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
10915 addr_mask
&= ~(RELOAD_REG_INDEXED
10916 | RELOAD_REG_PRE_INCDEC
10917 | RELOAD_REG_PRE_MODIFY
);
10923 /* If the register isn't valid in this register class, just return now. */
10924 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
10926 if (TARGET_DEBUG_ADDR
)
10929 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10930 "not valid in class\n",
10931 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
10938 switch (GET_CODE (addr
))
10940 /* Does the register class supports auto update forms for this mode? We
10941 don't need a scratch register, since the powerpc only supports
10942 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10945 reg
= XEXP (addr
, 0);
10946 if (!base_reg_operand (addr
, GET_MODE (reg
)))
10948 fail_msg
= "no base register #1";
10952 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
10960 reg
= XEXP (addr
, 0);
10961 plus_arg1
= XEXP (addr
, 1);
10962 if (!base_reg_operand (reg
, GET_MODE (reg
))
10963 || GET_CODE (plus_arg1
) != PLUS
10964 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
10966 fail_msg
= "bad PRE_MODIFY";
10970 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
10977 /* Do we need to simulate AND -16 to clear the bottom address bits used
10978 in VMX load/stores? Only allow the AND for vector sizes. */
10980 and_arg
= XEXP (addr
, 0);
10981 if (GET_MODE_SIZE (mode
) != 16
10982 || !CONST_INT_P (XEXP (addr
, 1))
10983 || INTVAL (XEXP (addr
, 1)) != -16)
10985 fail_msg
= "bad Altivec AND #1";
10989 if (rclass
!= ALTIVEC_REGS
)
10991 if (legitimate_indirect_address_p (and_arg
, false))
10994 else if (legitimate_indexed_address_p (and_arg
, false))
10999 fail_msg
= "bad Altivec AND #2";
11007 /* If this is an indirect address, make sure it is a base register. */
11010 if (!legitimate_indirect_address_p (addr
, false))
11017 /* If this is an indexed address, make sure the register class can handle
11018 indexed addresses for this mode. */
11020 plus_arg0
= XEXP (addr
, 0);
11021 plus_arg1
= XEXP (addr
, 1);
11023 /* (plus (plus (reg) (constant)) (constant)) is generated during
11024 push_reload processing, so handle it now. */
11025 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
11027 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11034 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11035 push_reload processing, so handle it now. */
11036 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
11038 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
11041 type
= "indexed #2";
11045 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
11047 fail_msg
= "no base register #2";
11051 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
11053 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
11054 || !legitimate_indexed_address_p (addr
, false))
11061 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
11062 && CONST_INT_P (plus_arg1
))
11064 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
11067 type
= "vector d-form offset";
11071 /* Make sure the register class can handle offset addresses. */
11072 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
11074 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11077 type
= "offset #2";
11083 fail_msg
= "bad PLUS";
11090 /* Quad offsets are restricted and can't handle normal addresses. */
11091 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11094 type
= "vector d-form lo_sum";
11097 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
11099 fail_msg
= "bad LO_SUM";
11103 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11110 /* Static addresses need to create a TOC entry. */
11114 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11117 type
= "vector d-form lo_sum #2";
11123 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
11127 /* TOC references look like offsetable memory. */
11129 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
11131 fail_msg
= "bad UNSPEC";
11135 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11138 type
= "vector d-form lo_sum #3";
11141 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11144 type
= "toc reference";
11150 fail_msg
= "bad address";
11155 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
11157 if (extra_cost
< 0)
11159 "rs6000_secondary_reload_memory error: mode = %s, "
11160 "class = %s, addr_mask = '%s', %s\n",
11161 GET_MODE_NAME (mode
),
11162 reg_class_names
[rclass
],
11163 rs6000_debug_addr_mask (addr_mask
, false),
11164 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
11168 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11169 "addr_mask = '%s', extra cost = %d, %s\n",
11170 GET_MODE_NAME (mode
),
11171 reg_class_names
[rclass
],
11172 rs6000_debug_addr_mask (addr_mask
, false),
11174 (type
) ? type
: "<none>");
11182 /* Helper function for rs6000_secondary_reload to return true if a move to a
11183 different register classe is really a simple move. */
11186 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
11187 enum rs6000_reg_type from_type
,
11190 int size
= GET_MODE_SIZE (mode
);
11192 /* Add support for various direct moves available. In this function, we only
11193 look at cases where we don't need any extra registers, and one or more
11194 simple move insns are issued. Originally small integers are not allowed
11195 in FPR/VSX registers. Single precision binary floating is not a simple
11196 move because we need to convert to the single precision memory layout.
11197 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11198 need special direct move handling, which we do not support yet. */
11199 if (TARGET_DIRECT_MOVE
11200 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11201 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
11203 if (TARGET_POWERPC64
)
11205 /* ISA 2.07: MTVSRD or MVFVSRD. */
11209 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11210 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
11214 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11215 if (TARGET_P8_VECTOR
)
11217 if (mode
== SImode
)
11220 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
11224 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11225 if (mode
== SDmode
)
11229 /* Move to/from SPR. */
11230 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
11231 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
11232 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
11238 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11239 special direct moves that involve allocating an extra register, return the
11240 insn code of the helper function if there is such a function or
11241 CODE_FOR_nothing if not. */
11244 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
11245 enum rs6000_reg_type from_type
,
11247 secondary_reload_info
*sri
,
11251 enum insn_code icode
= CODE_FOR_nothing
;
11253 int size
= GET_MODE_SIZE (mode
);
11255 if (TARGET_POWERPC64
&& size
== 16)
11257 /* Handle moving 128-bit values from GPRs to VSX point registers on
11258 ISA 2.07 (power8, power9) when running in 64-bit mode using
11259 XXPERMDI to glue the two 64-bit values back together. */
11260 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
11262 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
11263 icode
= reg_addr
[mode
].reload_vsx_gpr
;
11266 /* Handle moving 128-bit values from VSX point registers to GPRs on
11267 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11268 bottom 64-bit value. */
11269 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11271 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
11272 icode
= reg_addr
[mode
].reload_gpr_vsx
;
11276 else if (TARGET_POWERPC64
&& mode
== SFmode
)
11278 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11280 cost
= 3; /* xscvdpspn, mfvsrd, and. */
11281 icode
= reg_addr
[mode
].reload_gpr_vsx
;
11284 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
11286 cost
= 2; /* mtvsrz, xscvspdpn. */
11287 icode
= reg_addr
[mode
].reload_vsx_gpr
;
11291 else if (!TARGET_POWERPC64
&& size
== 8)
11293 /* Handle moving 64-bit values from GPRs to floating point registers on
11294 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11295 32-bit values back together. Altivec register classes must be handled
11296 specially since a different instruction is used, and the secondary
11297 reload support requires a single instruction class in the scratch
11298 register constraint. However, right now TFmode is not allowed in
11299 Altivec registers, so the pattern will never match. */
11300 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
11302 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
11303 icode
= reg_addr
[mode
].reload_fpr_gpr
;
11307 if (icode
!= CODE_FOR_nothing
)
11312 sri
->icode
= icode
;
11313 sri
->extra_cost
= cost
;
11320 /* Return whether a move between two register classes can be done either
11321 directly (simple move) or via a pattern that uses a single extra temporary
11322 (using ISA 2.07's direct move in this case. */
11325 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
11326 enum rs6000_reg_type from_type
,
11328 secondary_reload_info
*sri
,
11331 /* Fall back to load/store reloads if either type is not a register. */
11332 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
11335 /* If we haven't allocated registers yet, assume the move can be done for the
11336 standard register types. */
11337 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
11338 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
11339 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
11342 /* Moves to the same set of registers is a simple move for non-specialized
11344 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
11347 /* Check whether a simple move can be done directly. */
11348 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
11352 sri
->icode
= CODE_FOR_nothing
;
11353 sri
->extra_cost
= 0;
11358 /* Now check if we can do it in a few steps. */
11359 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
11363 /* Inform reload about cases where moving X with a mode MODE to a register in
11364 RCLASS requires an extra scratch or immediate register. Return the class
11365 needed for the immediate register.
11367 For VSX and Altivec, we may need a register to convert sp+offset into
11370 For misaligned 64-bit gpr loads and stores we need a register to
11371 convert an offset address to indirect. */
11374 rs6000_secondary_reload (bool in_p
,
11376 reg_class_t rclass_i
,
11378 secondary_reload_info
*sri
)
11380 enum reg_class rclass
= (enum reg_class
) rclass_i
;
11381 reg_class_t ret
= ALL_REGS
;
11382 enum insn_code icode
;
11383 bool default_p
= false;
11384 bool done_p
= false;
11386 /* Allow subreg of memory before/during reload. */
11387 bool memory_p
= (MEM_P (x
)
11388 || (!reload_completed
&& SUBREG_P (x
)
11389 && MEM_P (SUBREG_REG (x
))));
11391 sri
->icode
= CODE_FOR_nothing
;
11392 sri
->t_icode
= CODE_FOR_nothing
;
11393 sri
->extra_cost
= 0;
11395 ? reg_addr
[mode
].reload_load
11396 : reg_addr
[mode
].reload_store
);
11398 if (REG_P (x
) || register_operand (x
, mode
))
11400 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
11401 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
11402 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
11405 std::swap (to_type
, from_type
);
11407 /* Can we do a direct move of some sort? */
11408 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
11411 icode
= (enum insn_code
)sri
->icode
;
11418 /* Make sure 0.0 is not reloaded or forced into memory. */
11419 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
11426 /* If this is a scalar floating point value and we want to load it into the
11427 traditional Altivec registers, do it via a move via a traditional floating
11428 point register, unless we have D-form addressing. Also make sure that
11429 non-zero constants use a FPR. */
11430 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
11431 && !mode_supports_vmx_dform (mode
)
11432 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
11433 && (memory_p
|| CONST_DOUBLE_P (x
)))
11440 /* Handle reload of load/stores if we have reload helper functions. */
11441 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
11443 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
11446 if (extra_cost
>= 0)
11450 if (extra_cost
> 0)
11452 sri
->extra_cost
= extra_cost
;
11453 sri
->icode
= icode
;
11458 /* Handle unaligned loads and stores of integer registers. */
11459 if (!done_p
&& TARGET_POWERPC64
11460 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
11462 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
11464 rtx addr
= XEXP (x
, 0);
11465 rtx off
= address_offset (addr
);
11467 if (off
!= NULL_RTX
)
11469 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
11470 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
11472 /* We need a secondary reload when our legitimate_address_p
11473 says the address is good (as otherwise the entire address
11474 will be reloaded), and the offset is not a multiple of
11475 four or we have an address wrap. Address wrap will only
11476 occur for LO_SUMs since legitimate_offset_address_p
11477 rejects addresses for 16-byte mems that will wrap. */
11478 if (GET_CODE (addr
) == LO_SUM
11479 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11480 && ((offset
& 3) != 0
11481 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
11482 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
11483 && (offset
& 3) != 0))
11485 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11487 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
11488 : CODE_FOR_reload_di_load
);
11490 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
11491 : CODE_FOR_reload_di_store
);
11492 sri
->extra_cost
= 2;
11503 if (!done_p
&& !TARGET_POWERPC64
11504 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
11506 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
11508 rtx addr
= XEXP (x
, 0);
11509 rtx off
= address_offset (addr
);
11511 if (off
!= NULL_RTX
)
11513 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
11514 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
11516 /* We need a secondary reload when our legitimate_address_p
11517 says the address is good (as otherwise the entire address
11518 will be reloaded), and we have a wrap.
11520 legitimate_lo_sum_address_p allows LO_SUM addresses to
11521 have any offset so test for wrap in the low 16 bits.
11523 legitimate_offset_address_p checks for the range
11524 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11525 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11526 [0x7ff4,0x7fff] respectively, so test for the
11527 intersection of these ranges, [0x7ffc,0x7fff] and
11528 [0x7ff4,0x7ff7] respectively.
11530 Note that the address we see here may have been
11531 manipulated by legitimize_reload_address. */
11532 if (GET_CODE (addr
) == LO_SUM
11533 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
11534 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
11537 sri
->icode
= CODE_FOR_reload_si_load
;
11539 sri
->icode
= CODE_FOR_reload_si_store
;
11540 sri
->extra_cost
= 2;
11555 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
11557 gcc_assert (ret
!= ALL_REGS
);
11559 if (TARGET_DEBUG_ADDR
)
11562 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11564 reg_class_names
[ret
],
11565 in_p
? "true" : "false",
11566 reg_class_names
[rclass
],
11567 GET_MODE_NAME (mode
));
11569 if (reload_completed
)
11570 fputs (", after reload", stderr
);
11573 fputs (", done_p not set", stderr
);
11576 fputs (", default secondary reload", stderr
);
11578 if (sri
->icode
!= CODE_FOR_nothing
)
11579 fprintf (stderr
, ", reload func = %s, extra cost = %d",
11580 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
11582 else if (sri
->extra_cost
> 0)
11583 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
11585 fputs ("\n", stderr
);
11592 /* Better tracing for rs6000_secondary_reload_inner. */
11595 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
11600 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
11602 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
11603 store_p
? "store" : "load");
11606 set
= gen_rtx_SET (mem
, reg
);
11608 set
= gen_rtx_SET (reg
, mem
);
11610 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11611 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
11614 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
11615 ATTRIBUTE_NORETURN
;
11618 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
11621 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
11622 gcc_unreachable ();
11625 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11626 reload helper functions. These were identified in
11627 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11628 reload, it calls the insns:
11629 reload_<RELOAD:mode>_<P:mptrsize>_store
11630 reload_<RELOAD:mode>_<P:mptrsize>_load
11632 which in turn calls this function, to do whatever is necessary to create
11633 valid addresses. */
11636 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
11638 int regno
= true_regnum (reg
);
11639 machine_mode mode
= GET_MODE (reg
);
11640 addr_mask_type addr_mask
;
11643 rtx op_reg
, op0
, op1
;
11648 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
11649 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
11650 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11652 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
11653 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
11655 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
11656 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
11658 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
11659 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
11662 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11664 /* Make sure the mode is valid in this register class. */
11665 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
11666 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11668 if (TARGET_DEBUG_ADDR
)
11669 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
11671 new_addr
= addr
= XEXP (mem
, 0);
11672 switch (GET_CODE (addr
))
11674 /* Does the register class support auto update forms for this mode? If
11675 not, do the update now. We don't need a scratch register, since the
11676 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11679 op_reg
= XEXP (addr
, 0);
11680 if (!base_reg_operand (op_reg
, Pmode
))
11681 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11683 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
11685 int delta
= GET_MODE_SIZE (mode
);
11686 if (GET_CODE (addr
) == PRE_DEC
)
11688 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
11694 op0
= XEXP (addr
, 0);
11695 op1
= XEXP (addr
, 1);
11696 if (!base_reg_operand (op0
, Pmode
)
11697 || GET_CODE (op1
) != PLUS
11698 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
11699 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11701 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
11703 emit_insn (gen_rtx_SET (op0
, op1
));
11708 /* Do we need to simulate AND -16 to clear the bottom address bits used
11709 in VMX load/stores? */
11711 op0
= XEXP (addr
, 0);
11712 op1
= XEXP (addr
, 1);
11713 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
11715 if (REG_P (op0
) || SUBREG_P (op0
))
11718 else if (GET_CODE (op1
) == PLUS
)
11720 emit_insn (gen_rtx_SET (scratch
, op1
));
11725 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11727 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
11728 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
11729 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
11730 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
11731 new_addr
= scratch
;
11735 /* If this is an indirect address, make sure it is a base register. */
11738 if (!base_reg_operand (addr
, GET_MODE (addr
)))
11740 emit_insn (gen_rtx_SET (scratch
, addr
));
11741 new_addr
= scratch
;
11745 /* If this is an indexed address, make sure the register class can handle
11746 indexed addresses for this mode. */
11748 op0
= XEXP (addr
, 0);
11749 op1
= XEXP (addr
, 1);
11750 if (!base_reg_operand (op0
, Pmode
))
11751 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11753 else if (int_reg_operand (op1
, Pmode
))
11755 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
11757 emit_insn (gen_rtx_SET (scratch
, addr
));
11758 new_addr
= scratch
;
11762 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
11764 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
11765 || !quad_address_p (addr
, mode
, false))
11767 emit_insn (gen_rtx_SET (scratch
, addr
));
11768 new_addr
= scratch
;
11772 /* Make sure the register class can handle offset addresses. */
11773 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
11775 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11777 emit_insn (gen_rtx_SET (scratch
, addr
));
11778 new_addr
= scratch
;
11783 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11788 op0
= XEXP (addr
, 0);
11789 op1
= XEXP (addr
, 1);
11790 if (!base_reg_operand (op0
, Pmode
))
11791 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11793 else if (int_reg_operand (op1
, Pmode
))
11795 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
11797 emit_insn (gen_rtx_SET (scratch
, addr
));
11798 new_addr
= scratch
;
11802 /* Quad offsets are restricted and can't handle normal addresses. */
11803 else if (mode_supports_dq_form (mode
))
11805 emit_insn (gen_rtx_SET (scratch
, addr
));
11806 new_addr
= scratch
;
11809 /* Make sure the register class can handle offset addresses. */
11810 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
11812 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11814 emit_insn (gen_rtx_SET (scratch
, addr
));
11815 new_addr
= scratch
;
11820 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11827 rs6000_emit_move (scratch
, addr
, Pmode
);
11828 new_addr
= scratch
;
11832 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
11835 /* Adjust the address if it changed. */
11836 if (addr
!= new_addr
)
11838 mem
= replace_equiv_address_nv (mem
, new_addr
);
11839 if (TARGET_DEBUG_ADDR
)
11840 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11843 /* Now create the move. */
11845 emit_insn (gen_rtx_SET (mem
, reg
));
11847 emit_insn (gen_rtx_SET (reg
, mem
));
11852 /* Convert reloads involving 64-bit gprs and misaligned offset
11853 addressing, or multiple 32-bit gprs and offsets that are too large,
11854 to use indirect addressing. */
11857 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
11859 int regno
= true_regnum (reg
);
11860 enum reg_class rclass
;
11862 rtx scratch_or_premodify
= scratch
;
11864 if (TARGET_DEBUG_ADDR
)
11866 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
11867 store_p
? "store" : "load");
11868 fprintf (stderr
, "reg:\n");
11870 fprintf (stderr
, "mem:\n");
11872 fprintf (stderr
, "scratch:\n");
11873 debug_rtx (scratch
);
11876 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
11877 gcc_assert (MEM_P (mem
));
11878 rclass
= REGNO_REG_CLASS (regno
);
11879 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
11880 addr
= XEXP (mem
, 0);
11882 if (GET_CODE (addr
) == PRE_MODIFY
)
11884 gcc_assert (REG_P (XEXP (addr
, 0))
11885 && GET_CODE (XEXP (addr
, 1)) == PLUS
11886 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
11887 scratch_or_premodify
= XEXP (addr
, 0);
11888 addr
= XEXP (addr
, 1);
11890 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
11892 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
11894 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
11896 /* Now create the move. */
11898 emit_insn (gen_rtx_SET (mem
, reg
));
11900 emit_insn (gen_rtx_SET (reg
, mem
));
11905 /* Given an rtx X being reloaded into a reg required to be
11906 in class CLASS, return the class of reg to actually use.
11907 In general this is just CLASS; but on some machines
11908 in some cases it is preferable to use a more restrictive class.
11910 On the RS/6000, we have to return NO_REGS when we want to reload a
11911 floating-point CONST_DOUBLE to force it to be copied to memory.
11913 We also don't want to reload integer values into floating-point
11914 registers if we can at all help it. In fact, this can
11915 cause reload to die, if it tries to generate a reload of CTR
11916 into a FP register and discovers it doesn't have the memory location
11919 ??? Would it be a good idea to have reload do the converse, that is
11920 try to reload floating modes into FP registers if possible?
11923 static enum reg_class
11924 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
11926 machine_mode mode
= GET_MODE (x
);
11927 bool is_constant
= CONSTANT_P (x
);
11929 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11930 reload class for it. */
11931 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
11932 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
11935 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
11936 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
11939 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11940 the reloading of address expressions using PLUS into floating point
11942 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
11946 /* Zero is always allowed in all VSX registers. */
11947 if (x
== CONST0_RTX (mode
))
11950 /* If this is a vector constant that can be formed with a few Altivec
11951 instructions, we want altivec registers. */
11952 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
11953 return ALTIVEC_REGS
;
11955 /* If this is an integer constant that can easily be loaded into
11956 vector registers, allow it. */
11957 if (CONST_INT_P (x
))
11959 HOST_WIDE_INT value
= INTVAL (x
);
11961 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
11962 2.06 can generate it in the Altivec registers with
11966 if (TARGET_P8_VECTOR
)
11968 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
11969 return ALTIVEC_REGS
;
11974 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
11975 a sign extend in the Altivec registers. */
11976 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
11977 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
11978 return ALTIVEC_REGS
;
11981 /* Force constant to memory. */
11985 /* D-form addressing can easily reload the value. */
11986 if (mode_supports_vmx_dform (mode
)
11987 || mode_supports_dq_form (mode
))
11990 /* If this is a scalar floating point value and we don't have D-form
11991 addressing, prefer the traditional floating point registers so that we
11992 can use D-form (register+offset) addressing. */
11993 if (rclass
== VSX_REGS
11994 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
11997 /* Prefer the Altivec registers if Altivec is handling the vector
11998 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12000 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
12001 || mode
== V1TImode
)
12002 return ALTIVEC_REGS
;
12007 if (is_constant
|| GET_CODE (x
) == PLUS
)
12009 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
12010 return GENERAL_REGS
;
12011 if (reg_class_subset_p (BASE_REGS
, rclass
))
12016 if (GET_MODE_CLASS (mode
) == MODE_INT
&& rclass
== GEN_OR_FLOAT_REGS
)
12017 return GENERAL_REGS
;
12022 /* Debug version of rs6000_preferred_reload_class. */
12023 static enum reg_class
12024 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
12026 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
12029 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12031 reg_class_names
[ret
], reg_class_names
[rclass
],
12032 GET_MODE_NAME (GET_MODE (x
)));
12038 /* If we are copying between FP or AltiVec registers and anything else, we need
12039 a memory location. The exception is when we are targeting ppc64 and the
12040 move to/from fpr to gpr instructions are available. Also, under VSX, you
12041 can copy vector registers from the FP register set to the Altivec register
12042 set and vice versa. */
12045 rs6000_secondary_memory_needed (machine_mode mode
,
12046 reg_class_t from_class
,
12047 reg_class_t to_class
)
12049 enum rs6000_reg_type from_type
, to_type
;
12050 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
12051 || (to_class
== ALTIVEC_REGS
));
12053 /* If a simple/direct move is available, we don't need secondary memory */
12054 from_type
= reg_class_to_reg_type
[(int)from_class
];
12055 to_type
= reg_class_to_reg_type
[(int)to_class
];
12057 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
12058 (secondary_reload_info
*)0, altivec_p
))
12061 /* If we have a floating point or vector register class, we need to use
12062 memory to transfer the data. */
12063 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
12069 /* Debug version of rs6000_secondary_memory_needed. */
12071 rs6000_debug_secondary_memory_needed (machine_mode mode
,
12072 reg_class_t from_class
,
12073 reg_class_t to_class
)
12075 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
12078 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12079 "to_class = %s, mode = %s\n",
12080 ret
? "true" : "false",
12081 reg_class_names
[from_class
],
12082 reg_class_names
[to_class
],
12083 GET_MODE_NAME (mode
));
12088 /* Return the register class of a scratch register needed to copy IN into
12089 or out of a register in RCLASS in MODE. If it can be done directly,
12090 NO_REGS is returned. */
12092 static enum reg_class
12093 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
12098 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
12100 && MACHOPIC_INDIRECT
12104 /* We cannot copy a symbolic operand directly into anything
12105 other than BASE_REGS for TARGET_ELF. So indicate that a
12106 register from BASE_REGS is needed as an intermediate
12109 On Darwin, pic addresses require a load from memory, which
12110 needs a base register. */
12111 if (rclass
!= BASE_REGS
12112 && (SYMBOL_REF_P (in
)
12113 || GET_CODE (in
) == HIGH
12114 || GET_CODE (in
) == LABEL_REF
12115 || GET_CODE (in
) == CONST
))
12121 regno
= REGNO (in
);
12122 if (!HARD_REGISTER_NUM_P (regno
))
12124 regno
= true_regnum (in
);
12125 if (!HARD_REGISTER_NUM_P (regno
))
12129 else if (SUBREG_P (in
))
12131 regno
= true_regnum (in
);
12132 if (!HARD_REGISTER_NUM_P (regno
))
12138 /* If we have VSX register moves, prefer moving scalar values between
12139 Altivec registers and GPR by going via an FPR (and then via memory)
12140 instead of reloading the secondary memory address for Altivec moves. */
12142 && GET_MODE_SIZE (mode
) < 16
12143 && !mode_supports_vmx_dform (mode
)
12144 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
12145 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
12146 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12147 && (regno
>= 0 && INT_REGNO_P (regno
)))))
12150 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12152 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
12153 || (regno
>= 0 && INT_REGNO_P (regno
)))
12156 /* Constants, memory, and VSX registers can go into VSX registers (both the
12157 traditional floating point and the altivec registers). */
12158 if (rclass
== VSX_REGS
12159 && (regno
== -1 || VSX_REGNO_P (regno
)))
12162 /* Constants, memory, and FP registers can go into FP registers. */
12163 if ((regno
== -1 || FP_REGNO_P (regno
))
12164 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
12165 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
12167 /* Memory, and AltiVec registers can go into AltiVec registers. */
12168 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
12169 && rclass
== ALTIVEC_REGS
)
12172 /* We can copy among the CR registers. */
12173 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
12174 && regno
>= 0 && CR_REGNO_P (regno
))
12177 /* Otherwise, we need GENERAL_REGS. */
12178 return GENERAL_REGS
;
12181 /* Debug version of rs6000_secondary_reload_class. */
12182 static enum reg_class
12183 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
12184 machine_mode mode
, rtx in
)
12186 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
12188 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12189 "mode = %s, input rtx:\n",
12190 reg_class_names
[ret
], reg_class_names
[rclass
],
12191 GET_MODE_NAME (mode
));
12197 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12200 rs6000_can_change_mode_class (machine_mode from
,
12202 reg_class_t rclass
)
12204 unsigned from_size
= GET_MODE_SIZE (from
);
12205 unsigned to_size
= GET_MODE_SIZE (to
);
12207 if (from_size
!= to_size
)
12209 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
12211 if (reg_classes_intersect_p (xclass
, rclass
))
12213 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
12214 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
12215 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
12216 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
12218 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12219 single register under VSX because the scalar part of the register
12220 is in the upper 64-bits, and not the lower 64-bits. Types like
12221 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12222 IEEE floating point can't overlap, and neither can small
12225 if (to_float128_vector_p
&& from_float128_vector_p
)
12228 else if (to_float128_vector_p
|| from_float128_vector_p
)
12231 /* TDmode in floating-mode registers must always go into a register
12232 pair with the most significant word in the even-numbered register
12233 to match ISA requirements. In little-endian mode, this does not
12234 match subreg numbering, so we cannot allow subregs. */
12235 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
12238 if (from_size
< 8 || to_size
< 8)
12241 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
12244 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
12253 /* Since the VSX register set includes traditional floating point registers
12254 and altivec registers, just check for the size being different instead of
12255 trying to check whether the modes are vector modes. Otherwise it won't
12256 allow say DF and DI to change classes. For types like TFmode and TDmode
12257 that take 2 64-bit registers, rather than a single 128-bit register, don't
12258 allow subregs of those types to other 128 bit types. */
12259 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
12261 unsigned num_regs
= (from_size
+ 15) / 16;
12262 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
12263 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
12266 return (from_size
== 8 || from_size
== 16);
12269 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
12270 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
12276 /* Debug version of rs6000_can_change_mode_class. */
12278 rs6000_debug_can_change_mode_class (machine_mode from
,
12280 reg_class_t rclass
)
12282 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
12285 "rs6000_can_change_mode_class, return %s, from = %s, "
12286 "to = %s, rclass = %s\n",
12287 ret
? "true" : "false",
12288 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
12289 reg_class_names
[rclass
]);
12294 /* Return a string to do a move operation of 128 bits of data. */
12297 rs6000_output_move_128bit (rtx operands
[])
12299 rtx dest
= operands
[0];
12300 rtx src
= operands
[1];
12301 machine_mode mode
= GET_MODE (dest
);
12304 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
12305 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
12309 dest_regno
= REGNO (dest
);
12310 dest_gpr_p
= INT_REGNO_P (dest_regno
);
12311 dest_fp_p
= FP_REGNO_P (dest_regno
);
12312 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
12313 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
12318 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
12323 src_regno
= REGNO (src
);
12324 src_gpr_p
= INT_REGNO_P (src_regno
);
12325 src_fp_p
= FP_REGNO_P (src_regno
);
12326 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
12327 src_vsx_p
= src_fp_p
| src_vmx_p
;
12332 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
12335 /* Register moves. */
12336 if (dest_regno
>= 0 && src_regno
>= 0)
12343 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
12344 return (WORDS_BIG_ENDIAN
12345 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12346 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12348 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
12352 else if (TARGET_VSX
&& dest_vsx_p
)
12355 return "xxlor %x0,%x1,%x1";
12357 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
12358 return (WORDS_BIG_ENDIAN
12359 ? "mtvsrdd %x0,%1,%L1"
12360 : "mtvsrdd %x0,%L1,%1");
12362 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
12366 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
12367 return "vor %0,%1,%1";
12369 else if (dest_fp_p
&& src_fp_p
)
12374 else if (dest_regno
>= 0 && MEM_P (src
))
12378 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
12384 else if (TARGET_ALTIVEC
&& dest_vmx_p
12385 && altivec_indexed_or_indirect_operand (src
, mode
))
12386 return "lvx %0,%y1";
12388 else if (TARGET_VSX
&& dest_vsx_p
)
12390 if (mode_supports_dq_form (mode
)
12391 && quad_address_p (XEXP (src
, 0), mode
, true))
12392 return "lxv %x0,%1";
12394 else if (TARGET_P9_VECTOR
)
12395 return "lxvx %x0,%y1";
12397 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
12398 return "lxvw4x %x0,%y1";
12401 return "lxvd2x %x0,%y1";
12404 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
12405 return "lvx %0,%y1";
12407 else if (dest_fp_p
)
12412 else if (src_regno
>= 0 && MEM_P (dest
))
12416 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
12417 return "stq %1,%0";
12422 else if (TARGET_ALTIVEC
&& src_vmx_p
12423 && altivec_indexed_or_indirect_operand (dest
, mode
))
12424 return "stvx %1,%y0";
12426 else if (TARGET_VSX
&& src_vsx_p
)
12428 if (mode_supports_dq_form (mode
)
12429 && quad_address_p (XEXP (dest
, 0), mode
, true))
12430 return "stxv %x1,%0";
12432 else if (TARGET_P9_VECTOR
)
12433 return "stxvx %x1,%y0";
12435 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
12436 return "stxvw4x %x1,%y0";
12439 return "stxvd2x %x1,%y0";
12442 else if (TARGET_ALTIVEC
&& src_vmx_p
)
12443 return "stvx %1,%y0";
12450 else if (dest_regno
>= 0
12451 && (CONST_INT_P (src
)
12452 || CONST_WIDE_INT_P (src
)
12453 || CONST_DOUBLE_P (src
)
12454 || GET_CODE (src
) == CONST_VECTOR
))
12459 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
12460 || (dest_vsx_p
&& TARGET_VSX
))
12461 return output_vec_const_move (operands
);
12464 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
12467 /* Validate a 128-bit move. */
12469 rs6000_move_128bit_ok_p (rtx operands
[])
12471 machine_mode mode
= GET_MODE (operands
[0]);
12472 return (gpc_reg_operand (operands
[0], mode
)
12473 || gpc_reg_operand (operands
[1], mode
));
12476 /* Return true if a 128-bit move needs to be split. */
12478 rs6000_split_128bit_ok_p (rtx operands
[])
12480 if (!reload_completed
)
12483 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
12486 if (quad_load_store_p (operands
[0], operands
[1]))
12493 /* Given a comparison operation, return the bit number in CCR to test. We
12494 know this is a valid comparison.
12496 SCC_P is 1 if this is for an scc. That means that %D will have been
12497 used instead of %C, so the bits will be in different places.
12499 Return -1 if OP isn't a valid comparison for some reason. */
12502 ccr_bit (rtx op
, int scc_p
)
12504 enum rtx_code code
= GET_CODE (op
);
12505 machine_mode cc_mode
;
12510 if (!COMPARISON_P (op
))
12513 reg
= XEXP (op
, 0);
12515 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
12518 cc_mode
= GET_MODE (reg
);
12519 cc_regnum
= REGNO (reg
);
12520 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
12522 validate_condition_mode (code
, cc_mode
);
12524 /* When generating a sCOND operation, only positive conditions are
12543 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
12545 return base_bit
+ 2;
12546 case GT
: case GTU
: case UNLE
:
12547 return base_bit
+ 1;
12548 case LT
: case LTU
: case UNGE
:
12550 case ORDERED
: case UNORDERED
:
12551 return base_bit
+ 3;
12554 /* If scc, we will have done a cror to put the bit in the
12555 unordered position. So test that bit. For integer, this is ! LT
12556 unless this is an scc insn. */
12557 return scc_p
? base_bit
+ 3 : base_bit
;
12560 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
12567 /* Return the GOT register. */
12570 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
12572 /* The second flow pass currently (June 1999) can't update
12573 regs_ever_live without disturbing other parts of the compiler, so
12574 update it here to make the prolog/epilogue code happy. */
12575 if (!can_create_pseudo_p ()
12576 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
12577 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
12579 crtl
->uses_pic_offset_table
= 1;
12581 return pic_offset_table_rtx
;
12584 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12586 /* Write out a function code label. */
12589 rs6000_output_function_entry (FILE *file
, const char *fname
)
12591 if (fname
[0] != '.')
12593 switch (DEFAULT_ABI
)
12596 gcc_unreachable ();
12602 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
12612 RS6000_OUTPUT_BASENAME (file
, fname
);
12615 /* Print an operand. Recognize special options, documented below. */
12618 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12619 only introduced by the linker, when applying the sda21
12621 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12622 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12624 #define SMALL_DATA_RELOC "sda21"
12625 #define SMALL_DATA_REG 0
12629 print_operand (FILE *file
, rtx x
, int code
)
12632 unsigned HOST_WIDE_INT uval
;
12636 /* %a is output_address. */
12638 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12642 /* Like 'J' but get to the GT bit only. */
12643 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
12645 output_operand_lossage ("invalid %%D value");
12649 /* Bit 1 is GT bit. */
12650 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
12652 /* Add one for shift count in rlinm for scc. */
12653 fprintf (file
, "%d", i
+ 1);
12657 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12660 output_operand_lossage ("invalid %%e value");
12665 if ((uval
& 0xffff) == 0 && uval
!= 0)
12670 /* X is a CR register. Print the number of the EQ bit of the CR */
12671 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
12672 output_operand_lossage ("invalid %%E value");
12674 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
12678 /* X is a CR register. Print the shift count needed to move it
12679 to the high-order four bits. */
12680 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
12681 output_operand_lossage ("invalid %%f value");
12683 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
12687 /* Similar, but print the count for the rotate in the opposite
12689 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
12690 output_operand_lossage ("invalid %%F value");
12692 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
12696 /* X is a constant integer. If it is negative, print "m",
12697 otherwise print "z". This is to make an aze or ame insn. */
12698 if (!CONST_INT_P (x
))
12699 output_operand_lossage ("invalid %%G value");
12700 else if (INTVAL (x
) >= 0)
12707 /* If constant, output low-order five bits. Otherwise, write
12710 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
12712 print_operand (file
, x
, 0);
12716 /* If constant, output low-order six bits. Otherwise, write
12719 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
12721 print_operand (file
, x
, 0);
12725 /* Print `i' if this is a constant, else nothing. */
12731 /* Write the bit number in CCR for jump. */
12732 i
= ccr_bit (x
, 0);
12734 output_operand_lossage ("invalid %%j code");
12736 fprintf (file
, "%d", i
);
12740 /* Similar, but add one for shift count in rlinm for scc and pass
12741 scc flag to `ccr_bit'. */
12742 i
= ccr_bit (x
, 1);
12744 output_operand_lossage ("invalid %%J code");
12746 /* If we want bit 31, write a shift count of zero, not 32. */
12747 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
12751 /* X must be a constant. Write the 1's complement of the
12754 output_operand_lossage ("invalid %%k value");
12756 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
12760 /* X must be a symbolic constant on ELF. Write an
12761 expression suitable for an 'addi' that adds in the low 16
12762 bits of the MEM. */
12763 if (GET_CODE (x
) == CONST
)
12765 if (GET_CODE (XEXP (x
, 0)) != PLUS
12766 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
12767 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
12768 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
12769 output_operand_lossage ("invalid %%K value");
12771 print_operand_address (file
, x
);
12772 fputs ("@l", file
);
12775 /* %l is output_asm_label. */
12778 /* Write second word of DImode or DFmode reference. Works on register
12779 or non-indexed memory only. */
12781 fputs (reg_names
[REGNO (x
) + 1], file
);
12782 else if (MEM_P (x
))
12784 machine_mode mode
= GET_MODE (x
);
12785 /* Handle possible auto-increment. Since it is pre-increment and
12786 we have already done it, we can just use an offset of word. */
12787 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
12788 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
12789 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
12791 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
12792 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
12795 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
12799 if (small_data_operand (x
, GET_MODE (x
)))
12800 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
12801 reg_names
[SMALL_DATA_REG
]);
12805 case 'N': /* Unused */
12806 /* Write the number of elements in the vector times 4. */
12807 if (GET_CODE (x
) != PARALLEL
)
12808 output_operand_lossage ("invalid %%N value");
12810 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
12813 case 'O': /* Unused */
12814 /* Similar, but subtract 1 first. */
12815 if (GET_CODE (x
) != PARALLEL
)
12816 output_operand_lossage ("invalid %%O value");
12818 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
12822 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12825 || (i
= exact_log2 (INTVAL (x
))) < 0)
12826 output_operand_lossage ("invalid %%p value");
12828 fprintf (file
, "%d", i
);
12832 /* The operand must be an indirect memory reference. The result
12833 is the register name. */
12834 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
12835 || REGNO (XEXP (x
, 0)) >= 32)
12836 output_operand_lossage ("invalid %%P value");
12838 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
12842 /* This outputs the logical code corresponding to a boolean
12843 expression. The expression may have one or both operands
12844 negated (if one, only the first one). For condition register
12845 logical operations, it will also treat the negated
12846 CR codes as NOTs, but not handle NOTs of them. */
12848 const char *const *t
= 0;
12850 enum rtx_code code
= GET_CODE (x
);
12851 static const char * const tbl
[3][3] = {
12852 { "and", "andc", "nor" },
12853 { "or", "orc", "nand" },
12854 { "xor", "eqv", "xor" } };
12858 else if (code
== IOR
)
12860 else if (code
== XOR
)
12863 output_operand_lossage ("invalid %%q value");
12865 if (GET_CODE (XEXP (x
, 0)) != NOT
)
12869 if (GET_CODE (XEXP (x
, 1)) == NOT
)
12880 if (! TARGET_MFCRF
)
12886 /* X is a CR register. Print the mask for `mtcrf'. */
12887 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
12888 output_operand_lossage ("invalid %%R value");
12890 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
12894 /* Low 5 bits of 32 - value */
12896 output_operand_lossage ("invalid %%s value");
12898 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
12902 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12903 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
12905 output_operand_lossage ("invalid %%t value");
12909 /* Bit 3 is OV bit. */
12910 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
12912 /* If we want bit 31, write a shift count of zero, not 32. */
12913 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
12917 /* Print the symbolic name of a branch target register. */
12918 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
12919 x
= XVECEXP (x
, 0, 0);
12920 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
12921 && REGNO (x
) != CTR_REGNO
))
12922 output_operand_lossage ("invalid %%T value");
12923 else if (REGNO (x
) == LR_REGNO
)
12924 fputs ("lr", file
);
12926 fputs ("ctr", file
);
12930 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12931 for use in unsigned operand. */
12934 output_operand_lossage ("invalid %%u value");
12939 if ((uval
& 0xffff) == 0)
12942 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
12946 /* High-order 16 bits of constant for use in signed operand. */
12948 output_operand_lossage ("invalid %%v value");
12950 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
12951 (INTVAL (x
) >> 16) & 0xffff);
12955 /* Print `u' if this has an auto-increment or auto-decrement. */
12957 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
12958 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
12959 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
12964 /* Print the trap code for this operand. */
12965 switch (GET_CODE (x
))
12968 fputs ("eq", file
); /* 4 */
12971 fputs ("ne", file
); /* 24 */
12974 fputs ("lt", file
); /* 16 */
12977 fputs ("le", file
); /* 20 */
12980 fputs ("gt", file
); /* 8 */
12983 fputs ("ge", file
); /* 12 */
12986 fputs ("llt", file
); /* 2 */
12989 fputs ("lle", file
); /* 6 */
12992 fputs ("lgt", file
); /* 1 */
12995 fputs ("lge", file
); /* 5 */
12998 output_operand_lossage ("invalid %%V value");
13003 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13006 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
13007 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
13009 print_operand (file
, x
, 0);
13013 /* X is a FPR or Altivec register used in a VSX context. */
13014 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
13015 output_operand_lossage ("invalid %%x value");
13018 int reg
= REGNO (x
);
13019 int vsx_reg
= (FP_REGNO_P (reg
)
13021 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
13023 #ifdef TARGET_REGNAMES
13024 if (TARGET_REGNAMES
)
13025 fprintf (file
, "%%vs%d", vsx_reg
);
13028 fprintf (file
, "%d", vsx_reg
);
13034 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
13035 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
13036 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
13041 /* Like 'L', for third word of TImode/PTImode */
13043 fputs (reg_names
[REGNO (x
) + 2], file
);
13044 else if (MEM_P (x
))
13046 machine_mode mode
= GET_MODE (x
);
13047 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13048 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13049 output_address (mode
, plus_constant (Pmode
,
13050 XEXP (XEXP (x
, 0), 0), 8));
13051 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13052 output_address (mode
, plus_constant (Pmode
,
13053 XEXP (XEXP (x
, 0), 0), 8));
13055 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
13056 if (small_data_operand (x
, GET_MODE (x
)))
13057 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13058 reg_names
[SMALL_DATA_REG
]);
13063 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13064 x
= XVECEXP (x
, 0, 1);
13065 /* X is a SYMBOL_REF. Write out the name preceded by a
13066 period and without any trailing data in brackets. Used for function
13067 names. If we are configured for System V (or the embedded ABI) on
13068 the PowerPC, do not emit the period, since those systems do not use
13069 TOCs and the like. */
13070 if (!SYMBOL_REF_P (x
))
13072 output_operand_lossage ("invalid %%z value");
13076 /* For macho, check to see if we need a stub. */
13079 const char *name
= XSTR (x
, 0);
13081 if (darwin_symbol_stubs
13082 && MACHOPIC_INDIRECT
13083 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13084 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13086 assemble_name (file
, name
);
13088 else if (!DOT_SYMBOLS
)
13089 assemble_name (file
, XSTR (x
, 0));
13091 rs6000_output_function_entry (file
, XSTR (x
, 0));
13095 /* Like 'L', for last word of TImode/PTImode. */
13097 fputs (reg_names
[REGNO (x
) + 3], file
);
13098 else if (MEM_P (x
))
13100 machine_mode mode
= GET_MODE (x
);
13101 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13102 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13103 output_address (mode
, plus_constant (Pmode
,
13104 XEXP (XEXP (x
, 0), 0), 12));
13105 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13106 output_address (mode
, plus_constant (Pmode
,
13107 XEXP (XEXP (x
, 0), 0), 12));
13109 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
13110 if (small_data_operand (x
, GET_MODE (x
)))
13111 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13112 reg_names
[SMALL_DATA_REG
]);
13116 /* Print AltiVec memory operand. */
13121 gcc_assert (MEM_P (x
));
13125 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
13126 && GET_CODE (tmp
) == AND
13127 && CONST_INT_P (XEXP (tmp
, 1))
13128 && INTVAL (XEXP (tmp
, 1)) == -16)
13129 tmp
= XEXP (tmp
, 0);
13130 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
13131 && GET_CODE (tmp
) == PRE_MODIFY
)
13132 tmp
= XEXP (tmp
, 1);
13134 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
13137 if (GET_CODE (tmp
) != PLUS
13138 || !REG_P (XEXP (tmp
, 0))
13139 || !REG_P (XEXP (tmp
, 1)))
13141 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13145 if (REGNO (XEXP (tmp
, 0)) == 0)
13146 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
13147 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
13149 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
13150 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
13157 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
13158 else if (MEM_P (x
))
13160 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13161 know the width from the mode. */
13162 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
13163 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
13164 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
13165 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13166 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
13167 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
13168 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13169 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
13171 output_address (GET_MODE (x
), XEXP (x
, 0));
13173 else if (toc_relative_expr_p (x
, false,
13174 &tocrel_base_oac
, &tocrel_offset_oac
))
13175 /* This hack along with a corresponding hack in
13176 rs6000_output_addr_const_extra arranges to output addends
13177 where the assembler expects to find them. eg.
13178 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13179 without this hack would be output as "x@toc+4". We
13181 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
13182 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
13183 output_addr_const (file
, XVECEXP (x
, 0, 0));
13184 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13185 output_addr_const (file
, XVECEXP (x
, 0, 1));
13187 output_addr_const (file
, x
);
13191 if (const char *name
= get_some_local_dynamic_name ())
13192 assemble_name (file
, name
);
13194 output_operand_lossage ("'%%&' used without any "
13195 "local dynamic TLS references");
13199 output_operand_lossage ("invalid %%xn code");
13203 /* Print the address of an operand. */
13206 print_operand_address (FILE *file
, rtx x
)
13209 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
13211 /* Is it a PC-relative address? */
13212 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
13214 HOST_WIDE_INT offset
;
13216 if (GET_CODE (x
) == CONST
)
13219 if (GET_CODE (x
) == PLUS
)
13221 offset
= INTVAL (XEXP (x
, 1));
13227 output_addr_const (file
, x
);
13230 fprintf (file
, "%+" PRId64
, offset
);
13232 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
13233 fprintf (file
, "@got");
13235 fprintf (file
, "@pcrel");
13237 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
13238 || GET_CODE (x
) == LABEL_REF
)
13240 output_addr_const (file
, x
);
13241 if (small_data_operand (x
, GET_MODE (x
)))
13242 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13243 reg_names
[SMALL_DATA_REG
]);
13245 gcc_assert (!TARGET_TOC
);
13247 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
13248 && REG_P (XEXP (x
, 1)))
13250 if (REGNO (XEXP (x
, 0)) == 0)
13251 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
13252 reg_names
[ REGNO (XEXP (x
, 0)) ]);
13254 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
13255 reg_names
[ REGNO (XEXP (x
, 1)) ]);
13257 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
13258 && CONST_INT_P (XEXP (x
, 1)))
13259 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
13260 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
13262 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
13263 && CONSTANT_P (XEXP (x
, 1)))
13265 fprintf (file
, "lo16(");
13266 output_addr_const (file
, XEXP (x
, 1));
13267 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
13271 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
13272 && CONSTANT_P (XEXP (x
, 1)))
13274 output_addr_const (file
, XEXP (x
, 1));
13275 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
13278 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
13280 /* This hack along with a corresponding hack in
13281 rs6000_output_addr_const_extra arranges to output addends
13282 where the assembler expects to find them. eg.
13284 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13285 without this hack would be output as "x@toc+8@l(9)". We
13286 want "x+8@toc@l(9)". */
13287 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
13288 if (GET_CODE (x
) == LO_SUM
)
13289 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
13291 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
13294 output_addr_const (file
, x
);
13297 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13300 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
13302 if (GET_CODE (x
) == UNSPEC
)
13303 switch (XINT (x
, 1))
13305 case UNSPEC_TOCREL
:
13306 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
13307 && REG_P (XVECEXP (x
, 0, 1))
13308 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
13309 output_addr_const (file
, XVECEXP (x
, 0, 0));
13310 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
13312 if (INTVAL (tocrel_offset_oac
) >= 0)
13313 fprintf (file
, "+");
13314 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
13316 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
13319 assemble_name (file
, toc_label_name
);
13322 else if (TARGET_ELF
)
13323 fputs ("@toc", file
);
13327 case UNSPEC_MACHOPIC_OFFSET
:
13328 output_addr_const (file
, XVECEXP (x
, 0, 0));
13330 machopic_output_function_base_name (file
);
13337 /* Target hook for assembling integer objects. The PowerPC version has
13338 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13339 is defined. It also needs to handle DI-mode objects on 64-bit
13343 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
13345 #ifdef RELOCATABLE_NEEDS_FIXUP
13346 /* Special handling for SI values. */
13347 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
13349 static int recurse
= 0;
13351 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13352 the .fixup section. Since the TOC section is already relocated, we
13353 don't need to mark it here. We used to skip the text section, but it
13354 should never be valid for relocated addresses to be placed in the text
13356 if (DEFAULT_ABI
== ABI_V4
13357 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
13358 && in_section
!= toc_section
13360 && !CONST_SCALAR_INT_P (x
)
13366 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
13368 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
13369 fprintf (asm_out_file
, "\t.long\t(");
13370 output_addr_const (asm_out_file
, x
);
13371 fprintf (asm_out_file
, ")@fixup\n");
13372 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
13373 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
13374 fprintf (asm_out_file
, "\t.long\t");
13375 assemble_name (asm_out_file
, buf
);
13376 fprintf (asm_out_file
, "\n\t.previous\n");
13380 /* Remove initial .'s to turn a -mcall-aixdesc function
13381 address into the address of the descriptor, not the function
13383 else if (SYMBOL_REF_P (x
)
13384 && XSTR (x
, 0)[0] == '.'
13385 && DEFAULT_ABI
== ABI_AIX
)
13387 const char *name
= XSTR (x
, 0);
13388 while (*name
== '.')
13391 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
13395 #endif /* RELOCATABLE_NEEDS_FIXUP */
13396 return default_assemble_integer (x
, size
, aligned_p
);
13399 /* Return a template string for assembly to emit when making an
13400 external call. FUNOP is the call mem argument operand number. */
13402 static const char *
13403 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
13405 /* -Wformat-overflow workaround, without which gcc thinks that %u
13406 might produce 10 digits. */
13407 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
13411 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
13413 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
13414 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
13415 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
13416 sprintf (arg
, "(%%&@tlsld)");
13419 /* The magic 32768 offset here corresponds to the offset of
13420 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13422 sprintf (z
, "%%z%u%s", funop
,
13423 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
13426 static char str
[32]; /* 1 spare */
13427 if (rs6000_pcrel_p (cfun
))
13428 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
13429 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13430 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
13431 sibcall
? "" : "\n\tnop");
13432 else if (DEFAULT_ABI
== ABI_V4
)
13433 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
13434 flag_pic
? "@plt" : "");
13436 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13437 else if (DEFAULT_ABI
== ABI_DARWIN
)
13439 /* The cookie is in operand func+2. */
13440 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
13441 int cookie
= INTVAL (operands
[funop
+ 2]);
13442 if (cookie
& CALL_LONG
)
13444 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
13445 tree labelname
= get_prev_label (funname
);
13446 gcc_checking_assert (labelname
&& !sibcall
);
13448 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13449 instruction will reach 'foo', otherwise link as 'bl L42'".
13450 "L42" should be a 'branch island', that will do a far jump to
13451 'foo'. Branch islands are generated in
13452 macho_branch_islands(). */
13453 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
13454 IDENTIFIER_POINTER (labelname
));
13457 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13459 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
13463 gcc_unreachable ();
13468 rs6000_call_template (rtx
*operands
, unsigned int funop
)
13470 return rs6000_call_template_1 (operands
, funop
, false);
13474 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
13476 return rs6000_call_template_1 (operands
, funop
, true);
13479 /* As above, for indirect calls. */
13481 static const char *
13482 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
13485 /* -Wformat-overflow workaround, without which gcc thinks that %u
13486 might produce 10 digits. Note that -Wformat-overflow will not
13487 currently warn here for str[], so do not rely on a warning to
13488 ensure str[] is correctly sized. */
13489 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
13491 /* Currently, funop is either 0 or 1. The maximum string is always
13492 a !speculate 64-bit __tls_get_addr call.
13495 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13496 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13498 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13499 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13506 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13507 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13509 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13510 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13517 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13518 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13520 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13521 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13528 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13529 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13531 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13532 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13536 static char str
[160]; /* 8 spare */
13538 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
13540 if (DEFAULT_ABI
== ABI_AIX
)
13543 ptrload
, funop
+ 2);
13545 /* We don't need the extra code to stop indirect call speculation if
13547 bool speculate
= (TARGET_MACHO
13548 || rs6000_speculate_indirect_jumps
13549 || (REG_P (operands
[funop
])
13550 && REGNO (operands
[funop
]) == LR_REGNO
));
13552 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
13554 const char *rel64
= TARGET_64BIT
? "64" : "";
13557 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
13559 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
13560 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13562 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
13563 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13567 const char *notoc
= rs6000_pcrel_p (cfun
) ? "_NOTOC" : "";
13568 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
13569 && flag_pic
== 2 ? "+32768" : "");
13573 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13574 tls
, rel64
, notoc
, funop
, addend
);
13575 s
+= sprintf (s
, "crset 2\n\t");
13578 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13579 tls
, rel64
, notoc
, funop
, addend
);
13581 else if (!speculate
)
13582 s
+= sprintf (s
, "crset 2\n\t");
13584 if (rs6000_pcrel_p (cfun
))
13587 sprintf (s
, "b%%T%ul", funop
);
13589 sprintf (s
, "beq%%T%ul-", funop
);
13591 else if (DEFAULT_ABI
== ABI_AIX
)
13597 funop
, ptrload
, funop
+ 3);
13602 funop
, ptrload
, funop
+ 3);
13604 else if (DEFAULT_ABI
== ABI_ELFv2
)
13610 funop
, ptrload
, funop
+ 2);
13615 funop
, ptrload
, funop
+ 2);
13622 funop
, sibcall
? "" : "l");
13626 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
13632 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
13634 return rs6000_indirect_call_template_1 (operands
, funop
, false);
13638 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
13640 return rs6000_indirect_call_template_1 (operands
, funop
, true);
13644 /* Output indirect call insns. WHICH identifies the type of sequence. */
13646 rs6000_pltseq_template (rtx
*operands
, int which
)
13648 const char *rel64
= TARGET_64BIT
? "64" : "";
13651 if (GET_CODE (operands
[3]) == UNSPEC
)
13653 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
13654 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
13655 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13657 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
13658 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13662 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
13663 static char str
[96]; /* 10 spare */
13664 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
13665 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
13666 && flag_pic
== 2 ? "+32768" : "");
13669 case RS6000_PLTSEQ_TOCSAVE
:
13672 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13673 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
13676 case RS6000_PLTSEQ_PLT16_HA
:
13677 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
13680 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13684 "addis %%0,%%1,0\n\t"
13685 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13686 tls
, off
, rel64
, addend
);
13688 case RS6000_PLTSEQ_PLT16_LO
:
13690 "l%s %%0,0(%%1)\n\t"
13691 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13692 TARGET_64BIT
? "d" : "wz",
13693 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
13695 case RS6000_PLTSEQ_MTCTR
:
13698 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13699 tls
, rel64
, addend
);
13701 case RS6000_PLTSEQ_PLT_PCREL34
:
13703 "pl%s %%0,0(0),1\n\t"
13704 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13705 TARGET_64BIT
? "d" : "wz",
13709 gcc_unreachable ();
13715 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13716 /* Emit an assembler directive to set symbol visibility for DECL to
13717 VISIBILITY_TYPE. */
13720 rs6000_assemble_visibility (tree decl
, int vis
)
13725 /* Functions need to have their entry point symbol visibility set as
13726 well as their descriptor symbol visibility. */
13727 if (DEFAULT_ABI
== ABI_AIX
13729 && TREE_CODE (decl
) == FUNCTION_DECL
)
13731 static const char * const visibility_types
[] = {
13732 NULL
, "protected", "hidden", "internal"
13735 const char *name
, *type
;
13737 name
= ((* targetm
.strip_name_encoding
)
13738 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
13739 type
= visibility_types
[vis
];
13741 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
13742 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
13745 default_assemble_visibility (decl
, vis
);
13750 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
13752 /* Reversal of FP compares takes care -- an ordered compare
13753 becomes an unordered compare and vice versa. */
13754 if (mode
== CCFPmode
13755 && (!flag_finite_math_only
13756 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
13757 || code
== UNEQ
|| code
== LTGT
))
13758 return reverse_condition_maybe_unordered (code
);
13760 return reverse_condition (code
);
13763 /* Generate a compare for CODE. Return a brand-new rtx that
13764 represents the result of the compare. */
13767 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
13769 machine_mode comp_mode
;
13770 rtx compare_result
;
13771 enum rtx_code code
= GET_CODE (cmp
);
13772 rtx op0
= XEXP (cmp
, 0);
13773 rtx op1
= XEXP (cmp
, 1);
13775 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
13776 comp_mode
= CCmode
;
13777 else if (FLOAT_MODE_P (mode
))
13778 comp_mode
= CCFPmode
;
13779 else if (code
== GTU
|| code
== LTU
13780 || code
== GEU
|| code
== LEU
)
13781 comp_mode
= CCUNSmode
;
13782 else if ((code
== EQ
|| code
== NE
)
13783 && unsigned_reg_p (op0
)
13784 && (unsigned_reg_p (op1
)
13785 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
13786 /* These are unsigned values, perhaps there will be a later
13787 ordering compare that can be shared with this one. */
13788 comp_mode
= CCUNSmode
;
13790 comp_mode
= CCmode
;
13792 /* If we have an unsigned compare, make sure we don't have a signed value as
13794 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
13795 && INTVAL (op1
) < 0)
13797 op0
= copy_rtx_if_shared (op0
);
13798 op1
= force_reg (GET_MODE (op0
), op1
);
13799 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
13802 /* First, the compare. */
13803 compare_result
= gen_reg_rtx (comp_mode
);
13805 /* IEEE 128-bit support in VSX registers when we do not have hardware
13807 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
13809 rtx libfunc
= NULL_RTX
;
13810 bool check_nan
= false;
13817 libfunc
= optab_libfunc (eq_optab
, mode
);
13822 libfunc
= optab_libfunc (ge_optab
, mode
);
13827 libfunc
= optab_libfunc (le_optab
, mode
);
13832 libfunc
= optab_libfunc (unord_optab
, mode
);
13833 code
= (code
== UNORDERED
) ? NE
: EQ
;
13839 libfunc
= optab_libfunc (ge_optab
, mode
);
13840 code
= (code
== UNGE
) ? GE
: GT
;
13846 libfunc
= optab_libfunc (le_optab
, mode
);
13847 code
= (code
== UNLE
) ? LE
: LT
;
13853 libfunc
= optab_libfunc (eq_optab
, mode
);
13854 code
= (code
= UNEQ
) ? EQ
: NE
;
13858 gcc_unreachable ();
13861 gcc_assert (libfunc
);
13864 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
13865 SImode
, op0
, mode
, op1
, mode
);
13867 /* The library signals an exception for signalling NaNs, so we need to
13868 handle isgreater, etc. by first checking isordered. */
13871 rtx ne_rtx
, normal_dest
, unord_dest
;
13872 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
13873 rtx join_label
= gen_label_rtx ();
13874 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
13875 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
13878 /* Test for either value being a NaN. */
13879 gcc_assert (unord_func
);
13880 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
13881 SImode
, op0
, mode
, op1
, mode
);
13883 /* Set value (0) if either value is a NaN, and jump to the join
13885 dest
= gen_reg_rtx (SImode
);
13886 emit_move_insn (dest
, const1_rtx
);
13887 emit_insn (gen_rtx_SET (unord_cmp
,
13888 gen_rtx_COMPARE (comp_mode
, unord_dest
,
13891 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
13892 emit_jump_insn (gen_rtx_SET (pc_rtx
,
13893 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
13897 /* Do the normal comparison, knowing that the values are not
13899 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
13900 SImode
, op0
, mode
, op1
, mode
);
13902 emit_insn (gen_cstoresi4 (dest
,
13903 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
13905 normal_dest
, const0_rtx
));
13907 /* Join NaN and non-Nan paths. Compare dest against 0. */
13908 emit_label (join_label
);
13912 emit_insn (gen_rtx_SET (compare_result
,
13913 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
13918 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13919 CLOBBERs to match cmptf_internal2 pattern. */
13920 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
13921 && FLOAT128_IBM_P (GET_MODE (op0
))
13922 && TARGET_HARD_FLOAT
)
13923 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
13925 gen_rtx_SET (compare_result
,
13926 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
13927 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13928 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13929 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13930 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13931 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13932 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13933 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13934 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
13935 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
13936 else if (GET_CODE (op1
) == UNSPEC
13937 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
13939 rtx op1b
= XVECEXP (op1
, 0, 0);
13940 comp_mode
= CCEQmode
;
13941 compare_result
= gen_reg_rtx (CCEQmode
);
13943 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
13945 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
13948 emit_insn (gen_rtx_SET (compare_result
,
13949 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
13952 validate_condition_mode (code
, GET_MODE (compare_result
));
13954 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
13958 /* Return the diagnostic message string if the binary operation OP is
13959 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13962 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
13966 machine_mode mode1
= TYPE_MODE (type1
);
13967 machine_mode mode2
= TYPE_MODE (type2
);
13969 /* For complex modes, use the inner type. */
13970 if (COMPLEX_MODE_P (mode1
))
13971 mode1
= GET_MODE_INNER (mode1
);
13973 if (COMPLEX_MODE_P (mode2
))
13974 mode2
= GET_MODE_INNER (mode2
);
13976 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
13977 double to intermix unless -mfloat128-convert. */
13978 if (mode1
== mode2
)
13981 if (!TARGET_FLOAT128_CVT
)
13983 if ((mode1
== KFmode
&& mode2
== IFmode
)
13984 || (mode1
== IFmode
&& mode2
== KFmode
))
13985 return N_("__float128 and __ibm128 cannot be used in the same "
13988 if (TARGET_IEEEQUAD
13989 && ((mode1
== IFmode
&& mode2
== TFmode
)
13990 || (mode1
== TFmode
&& mode2
== IFmode
)))
13991 return N_("__ibm128 and long double cannot be used in the same "
13994 if (!TARGET_IEEEQUAD
13995 && ((mode1
== KFmode
&& mode2
== TFmode
)
13996 || (mode1
== TFmode
&& mode2
== KFmode
)))
13997 return N_("__float128 and long double cannot be used in the same "
14005 /* Expand floating point conversion to/from __float128 and __ibm128. */
14008 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
14010 machine_mode dest_mode
= GET_MODE (dest
);
14011 machine_mode src_mode
= GET_MODE (src
);
14012 convert_optab cvt
= unknown_optab
;
14013 bool do_move
= false;
14014 rtx libfunc
= NULL_RTX
;
14016 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
14017 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
14021 rtx_2func_t from_df
;
14022 rtx_2func_t from_sf
;
14023 rtx_2func_t from_si_sign
;
14024 rtx_2func_t from_si_uns
;
14025 rtx_2func_t from_di_sign
;
14026 rtx_2func_t from_di_uns
;
14029 rtx_2func_t to_si_sign
;
14030 rtx_2func_t to_si_uns
;
14031 rtx_2func_t to_di_sign
;
14032 rtx_2func_t to_di_uns
;
14033 } hw_conversions
[2] = {
14034 /* convertions to/from KFmode */
14036 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
14037 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
14038 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
14039 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
14040 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
14041 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
14042 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
14043 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
14044 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
14045 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
14046 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
14047 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
14050 /* convertions to/from TFmode */
14052 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
14053 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
14054 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
14055 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
14056 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
14057 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
14058 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
14059 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
14060 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
14061 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
14062 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
14063 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
14067 if (dest_mode
== src_mode
)
14068 gcc_unreachable ();
14070 /* Eliminate memory operations. */
14072 src
= force_reg (src_mode
, src
);
14076 rtx tmp
= gen_reg_rtx (dest_mode
);
14077 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
14078 rs6000_emit_move (dest
, tmp
, dest_mode
);
14082 /* Convert to IEEE 128-bit floating point. */
14083 if (FLOAT128_IEEE_P (dest_mode
))
14085 if (dest_mode
== KFmode
)
14087 else if (dest_mode
== TFmode
)
14090 gcc_unreachable ();
14096 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
14101 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
14107 if (FLOAT128_IBM_P (src_mode
))
14116 cvt
= ufloat_optab
;
14117 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
14121 cvt
= sfloat_optab
;
14122 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
14129 cvt
= ufloat_optab
;
14130 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
14134 cvt
= sfloat_optab
;
14135 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
14140 gcc_unreachable ();
14144 /* Convert from IEEE 128-bit floating point. */
14145 else if (FLOAT128_IEEE_P (src_mode
))
14147 if (src_mode
== KFmode
)
14149 else if (src_mode
== TFmode
)
14152 gcc_unreachable ();
14158 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
14163 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
14169 if (FLOAT128_IBM_P (dest_mode
))
14179 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
14184 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
14192 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
14197 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
14202 gcc_unreachable ();
14206 /* Both IBM format. */
14207 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
14211 gcc_unreachable ();
14213 /* Handle conversion between TFmode/KFmode/IFmode. */
14215 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
14217 /* Handle conversion if we have hardware support. */
14218 else if (TARGET_FLOAT128_HW
&& hw_convert
)
14219 emit_insn ((hw_convert
) (dest
, src
));
14221 /* Call an external function to do the conversion. */
14222 else if (cvt
!= unknown_optab
)
14224 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
14225 gcc_assert (libfunc
!= NULL_RTX
);
14227 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
14230 gcc_assert (dest2
!= NULL_RTX
);
14231 if (!rtx_equal_p (dest
, dest2
))
14232 emit_move_insn (dest
, dest2
);
14236 gcc_unreachable ();
14242 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14243 can be used as that dest register. Return the dest register. */
14246 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
14248 if (op2
== const0_rtx
)
14251 if (GET_CODE (scratch
) == SCRATCH
)
14252 scratch
= gen_reg_rtx (mode
);
14254 if (logical_operand (op2
, mode
))
14255 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
14257 emit_insn (gen_rtx_SET (scratch
,
14258 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
14263 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14264 requires this. The result is mode MODE. */
14266 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
14270 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
14271 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
14272 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
14273 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
14274 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
14275 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
14276 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
14277 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
14279 gcc_assert (n
== 2);
14281 rtx cc
= gen_reg_rtx (CCEQmode
);
14282 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
14283 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
14289 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
14291 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
14292 rtx_code cond_code
= GET_CODE (condition_rtx
);
14294 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
14295 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
14297 else if (cond_code
== NE
14298 || cond_code
== GE
|| cond_code
== LE
14299 || cond_code
== GEU
|| cond_code
== LEU
14300 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
14302 rtx not_result
= gen_reg_rtx (CCEQmode
);
14303 rtx not_op
, rev_cond_rtx
;
14304 machine_mode cc_mode
;
14306 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
14308 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
14309 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
14310 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
14311 emit_insn (gen_rtx_SET (not_result
, not_op
));
14312 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
14315 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
14316 if (op_mode
== VOIDmode
)
14317 op_mode
= GET_MODE (XEXP (operands
[1], 1));
14319 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
14321 PUT_MODE (condition_rtx
, DImode
);
14322 convert_move (operands
[0], condition_rtx
, 0);
14326 PUT_MODE (condition_rtx
, SImode
);
14327 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
14331 /* Emit a branch of kind CODE to location LOC. */
14334 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
14336 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
14337 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
14338 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
14339 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
14342 /* Return the string to output a conditional branch to LABEL, which is
14343 the operand template of the label, or NULL if the branch is really a
14344 conditional return.
14346 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14347 condition code register and its mode specifies what kind of
14348 comparison we made.
14350 REVERSED is nonzero if we should reverse the sense of the comparison.
14352 INSN is the insn. */
14355 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
14357 static char string
[64];
14358 enum rtx_code code
= GET_CODE (op
);
14359 rtx cc_reg
= XEXP (op
, 0);
14360 machine_mode mode
= GET_MODE (cc_reg
);
14361 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
14362 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
14363 int really_reversed
= reversed
^ need_longbranch
;
14369 validate_condition_mode (code
, mode
);
14371 /* Work out which way this really branches. We could use
14372 reverse_condition_maybe_unordered here always but this
14373 makes the resulting assembler clearer. */
14374 if (really_reversed
)
14376 /* Reversal of FP compares takes care -- an ordered compare
14377 becomes an unordered compare and vice versa. */
14378 if (mode
== CCFPmode
)
14379 code
= reverse_condition_maybe_unordered (code
);
14381 code
= reverse_condition (code
);
14386 /* Not all of these are actually distinct opcodes, but
14387 we distinguish them for clarity of the resulting assembler. */
14388 case NE
: case LTGT
:
14389 ccode
= "ne"; break;
14390 case EQ
: case UNEQ
:
14391 ccode
= "eq"; break;
14393 ccode
= "ge"; break;
14394 case GT
: case GTU
: case UNGT
:
14395 ccode
= "gt"; break;
14397 ccode
= "le"; break;
14398 case LT
: case LTU
: case UNLT
:
14399 ccode
= "lt"; break;
14400 case UNORDERED
: ccode
= "un"; break;
14401 case ORDERED
: ccode
= "nu"; break;
14402 case UNGE
: ccode
= "nl"; break;
14403 case UNLE
: ccode
= "ng"; break;
14405 gcc_unreachable ();
14408 /* Maybe we have a guess as to how likely the branch is. */
14410 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
14411 if (note
!= NULL_RTX
)
14413 /* PROB is the difference from 50%. */
14414 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
14415 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
14417 /* Only hint for highly probable/improbable branches on newer cpus when
14418 we have real profile data, as static prediction overrides processor
14419 dynamic prediction. For older cpus we may as well always hint, but
14420 assume not taken for branches that are very close to 50% as a
14421 mispredicted taken branch is more expensive than a
14422 mispredicted not-taken branch. */
14423 if (rs6000_always_hint
14424 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
14425 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
14426 && br_prob_note_reliable_p (note
)))
14428 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
14429 && ((prob
> 0) ^ need_longbranch
))
14437 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
14439 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
14441 /* We need to escape any '%' characters in the reg_names string.
14442 Assume they'd only be the first character.... */
14443 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
14445 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
14449 /* If the branch distance was too far, we may have to use an
14450 unconditional branch to go the distance. */
14451 if (need_longbranch
)
14452 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
14454 s
+= sprintf (s
, ",%s", label
);
14460 /* Return insn for VSX or Altivec comparisons. */
14463 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
14466 machine_mode mode
= GET_MODE (op0
);
14474 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
14485 mask
= gen_reg_rtx (mode
);
14486 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
14493 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14494 DMODE is expected destination mode. This is a recursive function. */
14497 rs6000_emit_vector_compare (enum rtx_code rcode
,
14499 machine_mode dmode
)
14502 bool swap_operands
= false;
14503 bool try_again
= false;
14505 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
14506 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
14508 /* See if the comparison works as is. */
14509 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
14517 swap_operands
= true;
14522 swap_operands
= true;
14530 /* Invert condition and try again.
14531 e.g., A != B becomes ~(A==B). */
14533 enum rtx_code rev_code
;
14534 enum insn_code nor_code
;
14537 rev_code
= reverse_condition_maybe_unordered (rcode
);
14538 if (rev_code
== UNKNOWN
)
14541 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
14542 if (nor_code
== CODE_FOR_nothing
)
14545 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
14549 mask
= gen_reg_rtx (dmode
);
14550 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
14558 /* Try GT/GTU/LT/LTU OR EQ */
14561 enum insn_code ior_code
;
14562 enum rtx_code new_code
;
14583 gcc_unreachable ();
14586 ior_code
= optab_handler (ior_optab
, dmode
);
14587 if (ior_code
== CODE_FOR_nothing
)
14590 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
14594 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
14598 mask
= gen_reg_rtx (dmode
);
14599 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
14610 std::swap (op0
, op1
);
14612 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
14617 /* You only get two chances. */
14621 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14622 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14623 operands for the relation operation COND. */
14626 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
14627 rtx cond
, rtx cc_op0
, rtx cc_op1
)
14629 machine_mode dest_mode
= GET_MODE (dest
);
14630 machine_mode mask_mode
= GET_MODE (cc_op0
);
14631 enum rtx_code rcode
= GET_CODE (cond
);
14632 machine_mode cc_mode
= CCmode
;
14635 bool invert_move
= false;
14637 if (VECTOR_UNIT_NONE_P (dest_mode
))
14640 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
14641 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
14645 /* Swap operands if we can, and fall back to doing the operation as
14646 specified, and doing a NOR to invert the test. */
14652 /* Invert condition and try again.
14653 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14654 invert_move
= true;
14655 rcode
= reverse_condition_maybe_unordered (rcode
);
14656 if (rcode
== UNKNOWN
)
14662 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
14664 /* Invert condition to avoid compound test. */
14665 invert_move
= true;
14666 rcode
= reverse_condition (rcode
);
14674 /* Mark unsigned tests with CCUNSmode. */
14675 cc_mode
= CCUNSmode
;
14677 /* Invert condition to avoid compound test if necessary. */
14678 if (rcode
== GEU
|| rcode
== LEU
)
14680 invert_move
= true;
14681 rcode
= reverse_condition (rcode
);
14689 /* Get the vector mask for the given relational operations. */
14690 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
14696 std::swap (op_true
, op_false
);
14698 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14699 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
14700 && (GET_CODE (op_true
) == CONST_VECTOR
14701 || GET_CODE (op_false
) == CONST_VECTOR
))
14703 rtx constant_0
= CONST0_RTX (dest_mode
);
14704 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
14706 if (op_true
== constant_m1
&& op_false
== constant_0
)
14708 emit_move_insn (dest
, mask
);
14712 else if (op_true
== constant_0
&& op_false
== constant_m1
)
14714 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
14718 /* If we can't use the vector comparison directly, perhaps we can use
14719 the mask for the true or false fields, instead of loading up a
14721 if (op_true
== constant_m1
)
14724 if (op_false
== constant_0
)
14728 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
14729 op_true
= force_reg (dest_mode
, op_true
);
14731 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
14732 op_false
= force_reg (dest_mode
, op_false
);
14734 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
14735 CONST0_RTX (dest_mode
));
14736 emit_insn (gen_rtx_SET (dest
,
14737 gen_rtx_IF_THEN_ELSE (dest_mode
,
14744 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14745 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14746 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14747 hardware has no such operation. */
14750 rs6000_emit_p9_fp_minmax (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
14752 enum rtx_code code
= GET_CODE (op
);
14753 rtx op0
= XEXP (op
, 0);
14754 rtx op1
= XEXP (op
, 1);
14755 machine_mode compare_mode
= GET_MODE (op0
);
14756 machine_mode result_mode
= GET_MODE (dest
);
14757 bool max_p
= false;
14759 if (result_mode
!= compare_mode
)
14762 if (code
== GE
|| code
== GT
)
14764 else if (code
== LE
|| code
== LT
)
14769 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
14772 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
))
14778 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
14782 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14783 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14784 operands of the last comparison is nonzero/true, FALSE_COND if it is
14785 zero/false. Return 0 if the hardware has no such operation. */
14788 rs6000_emit_p9_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
14790 enum rtx_code code
= GET_CODE (op
);
14791 rtx op0
= XEXP (op
, 0);
14792 rtx op1
= XEXP (op
, 1);
14793 machine_mode result_mode
= GET_MODE (dest
);
14798 if (!can_create_pseudo_p ())
14811 code
= swap_condition (code
);
14812 std::swap (op0
, op1
);
14819 /* Generate: [(parallel [(set (dest)
14820 (if_then_else (op (cmp1) (cmp2))
14823 (clobber (scratch))])]. */
14825 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
14826 cmove_rtx
= gen_rtx_SET (dest
,
14827 gen_rtx_IF_THEN_ELSE (result_mode
,
14832 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
14833 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
14834 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
14839 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14840 operands of the last comparison is nonzero/true, FALSE_COND if it
14841 is zero/false. Return 0 if the hardware has no such operation. */
14844 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
14846 enum rtx_code code
= GET_CODE (op
);
14847 rtx op0
= XEXP (op
, 0);
14848 rtx op1
= XEXP (op
, 1);
14849 machine_mode compare_mode
= GET_MODE (op0
);
14850 machine_mode result_mode
= GET_MODE (dest
);
14852 bool is_against_zero
;
14854 /* These modes should always match. */
14855 if (GET_MODE (op1
) != compare_mode
14856 /* In the isel case however, we can use a compare immediate, so
14857 op1 may be a small constant. */
14858 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
14860 if (GET_MODE (true_cond
) != result_mode
)
14862 if (GET_MODE (false_cond
) != result_mode
)
14865 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14866 if (TARGET_P9_MINMAX
14867 && (compare_mode
== SFmode
|| compare_mode
== DFmode
)
14868 && (result_mode
== SFmode
|| result_mode
== DFmode
))
14870 if (rs6000_emit_p9_fp_minmax (dest
, op
, true_cond
, false_cond
))
14873 if (rs6000_emit_p9_fp_cmove (dest
, op
, true_cond
, false_cond
))
14877 /* Don't allow using floating point comparisons for integer results for
14879 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
14882 /* First, work out if the hardware can do this at all, or
14883 if it's too slow.... */
14884 if (!FLOAT_MODE_P (compare_mode
))
14887 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
14891 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
14893 /* A floating-point subtract might overflow, underflow, or produce
14894 an inexact result, thus changing the floating-point flags, so it
14895 can't be generated if we care about that. It's safe if one side
14896 of the construct is zero, since then no subtract will be
14898 if (SCALAR_FLOAT_MODE_P (compare_mode
)
14899 && flag_trapping_math
&& ! is_against_zero
)
14902 /* Eliminate half of the comparisons by switching operands, this
14903 makes the remaining code simpler. */
14904 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
14905 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
14907 code
= reverse_condition_maybe_unordered (code
);
14909 true_cond
= false_cond
;
14913 /* UNEQ and LTGT take four instructions for a comparison with zero,
14914 it'll probably be faster to use a branch here too. */
14915 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
14918 /* We're going to try to implement comparisons by performing
14919 a subtract, then comparing against zero. Unfortunately,
14920 Inf - Inf is NaN which is not zero, and so if we don't
14921 know that the operand is finite and the comparison
14922 would treat EQ different to UNORDERED, we can't do it. */
14923 if (HONOR_INFINITIES (compare_mode
)
14924 && code
!= GT
&& code
!= UNGE
14925 && (!CONST_DOUBLE_P (op1
)
14926 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
14927 /* Constructs of the form (a OP b ? a : b) are safe. */
14928 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
14929 || (! rtx_equal_p (op0
, true_cond
)
14930 && ! rtx_equal_p (op1
, true_cond
))))
14933 /* At this point we know we can use fsel. */
14935 /* Don't allow compare_mode other than SFmode or DFmode, for others there
14936 is no fsel instruction. */
14937 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
14940 /* Reduce the comparison to a comparison against zero. */
14941 if (! is_against_zero
)
14943 temp
= gen_reg_rtx (compare_mode
);
14944 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
14946 op1
= CONST0_RTX (compare_mode
);
14949 /* If we don't care about NaNs we can reduce some of the comparisons
14950 down to faster ones. */
14951 if (! HONOR_NANS (compare_mode
))
14957 true_cond
= false_cond
;
14970 /* Now, reduce everything down to a GE. */
14977 temp
= gen_reg_rtx (compare_mode
);
14978 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
14983 temp
= gen_reg_rtx (compare_mode
);
14984 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
14989 temp
= gen_reg_rtx (compare_mode
);
14990 emit_insn (gen_rtx_SET (temp
,
14991 gen_rtx_NEG (compare_mode
,
14992 gen_rtx_ABS (compare_mode
, op0
))));
14997 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
14998 temp
= gen_reg_rtx (result_mode
);
14999 emit_insn (gen_rtx_SET (temp
,
15000 gen_rtx_IF_THEN_ELSE (result_mode
,
15001 gen_rtx_GE (VOIDmode
,
15003 true_cond
, false_cond
)));
15004 false_cond
= true_cond
;
15007 temp
= gen_reg_rtx (compare_mode
);
15008 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15013 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15014 temp
= gen_reg_rtx (result_mode
);
15015 emit_insn (gen_rtx_SET (temp
,
15016 gen_rtx_IF_THEN_ELSE (result_mode
,
15017 gen_rtx_GE (VOIDmode
,
15019 true_cond
, false_cond
)));
15020 true_cond
= false_cond
;
15023 temp
= gen_reg_rtx (compare_mode
);
15024 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15029 gcc_unreachable ();
15032 emit_insn (gen_rtx_SET (dest
,
15033 gen_rtx_IF_THEN_ELSE (result_mode
,
15034 gen_rtx_GE (VOIDmode
,
15036 true_cond
, false_cond
)));
15040 /* Same as above, but for ints (isel). */
15043 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15045 rtx condition_rtx
, cr
;
15046 machine_mode mode
= GET_MODE (dest
);
15047 enum rtx_code cond_code
;
15048 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
15051 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
15054 /* We still have to do the compare, because isel doesn't do a
15055 compare, it just looks at the CRx bits set by a previous compare
15057 condition_rtx
= rs6000_generate_compare (op
, mode
);
15058 cond_code
= GET_CODE (condition_rtx
);
15059 cr
= XEXP (condition_rtx
, 0);
15060 signedp
= GET_MODE (cr
) == CCmode
;
15062 isel_func
= (mode
== SImode
15063 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
15064 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
15068 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
15069 /* isel handles these directly. */
15073 /* We need to swap the sense of the comparison. */
15075 std::swap (false_cond
, true_cond
);
15076 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
15081 false_cond
= force_reg (mode
, false_cond
);
15082 if (true_cond
!= const0_rtx
)
15083 true_cond
= force_reg (mode
, true_cond
);
15085 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
15091 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
15093 machine_mode mode
= GET_MODE (op0
);
15097 /* VSX/altivec have direct min/max insns. */
15098 if ((code
== SMAX
|| code
== SMIN
)
15099 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
15100 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
15102 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15106 if (code
== SMAX
|| code
== SMIN
)
15111 if (code
== SMAX
|| code
== UMAX
)
15112 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
15113 op0
, op1
, mode
, 0);
15115 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
15116 op1
, op0
, mode
, 0);
15117 gcc_assert (target
);
15118 if (target
!= dest
)
15119 emit_move_insn (dest
, target
);
15122 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15123 COND is true. Mark the jump as unlikely to be taken. */
15126 emit_unlikely_jump (rtx cond
, rtx label
)
15128 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
15129 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
15130 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
15133 /* A subroutine of the atomic operation splitters. Emit a load-locked
15134 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15135 the zero_extend operation. */
15138 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
15140 rtx (*fn
) (rtx
, rtx
) = NULL
;
15145 fn
= gen_load_lockedqi
;
15148 fn
= gen_load_lockedhi
;
15151 if (GET_MODE (mem
) == QImode
)
15152 fn
= gen_load_lockedqi_si
;
15153 else if (GET_MODE (mem
) == HImode
)
15154 fn
= gen_load_lockedhi_si
;
15156 fn
= gen_load_lockedsi
;
15159 fn
= gen_load_lockeddi
;
15162 fn
= gen_load_lockedti
;
15165 gcc_unreachable ();
15167 emit_insn (fn (reg
, mem
));
15170 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15171 instruction in MODE. */
15174 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
15176 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
15181 fn
= gen_store_conditionalqi
;
15184 fn
= gen_store_conditionalhi
;
15187 fn
= gen_store_conditionalsi
;
15190 fn
= gen_store_conditionaldi
;
15193 fn
= gen_store_conditionalti
;
15196 gcc_unreachable ();
15199 /* Emit sync before stwcx. to address PPC405 Erratum. */
15200 if (PPC405_ERRATUM77
)
15201 emit_insn (gen_hwsync ());
15203 emit_insn (fn (res
, mem
, val
));
15206 /* Expand barriers before and after a load_locked/store_cond sequence. */
15209 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
15211 rtx addr
= XEXP (mem
, 0);
15213 if (!legitimate_indirect_address_p (addr
, reload_completed
)
15214 && !legitimate_indexed_address_p (addr
, reload_completed
))
15216 addr
= force_reg (Pmode
, addr
);
15217 mem
= replace_equiv_address_nv (mem
, addr
);
15222 case MEMMODEL_RELAXED
:
15223 case MEMMODEL_CONSUME
:
15224 case MEMMODEL_ACQUIRE
:
15226 case MEMMODEL_RELEASE
:
15227 case MEMMODEL_ACQ_REL
:
15228 emit_insn (gen_lwsync ());
15230 case MEMMODEL_SEQ_CST
:
15231 emit_insn (gen_hwsync ());
15234 gcc_unreachable ();
15240 rs6000_post_atomic_barrier (enum memmodel model
)
15244 case MEMMODEL_RELAXED
:
15245 case MEMMODEL_CONSUME
:
15246 case MEMMODEL_RELEASE
:
15248 case MEMMODEL_ACQUIRE
:
15249 case MEMMODEL_ACQ_REL
:
15250 case MEMMODEL_SEQ_CST
:
15251 emit_insn (gen_isync ());
15254 gcc_unreachable ();
15258 /* A subroutine of the various atomic expanders. For sub-word operations,
15259 we must adjust things to operate on SImode. Given the original MEM,
15260 return a new aligned memory. Also build and return the quantities by
15261 which to shift and mask. */
15264 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
15266 rtx addr
, align
, shift
, mask
, mem
;
15267 HOST_WIDE_INT shift_mask
;
15268 machine_mode mode
= GET_MODE (orig_mem
);
15270 /* For smaller modes, we have to implement this via SImode. */
15271 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
15273 addr
= XEXP (orig_mem
, 0);
15274 addr
= force_reg (GET_MODE (addr
), addr
);
15276 /* Aligned memory containing subword. Generate a new memory. We
15277 do not want any of the existing MEM_ATTR data, as we're now
15278 accessing memory outside the original object. */
15279 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
15280 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15281 mem
= gen_rtx_MEM (SImode
, align
);
15282 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
15283 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
15284 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
15286 /* Shift amount for subword relative to aligned word. */
15287 shift
= gen_reg_rtx (SImode
);
15288 addr
= gen_lowpart (SImode
, addr
);
15289 rtx tmp
= gen_reg_rtx (SImode
);
15290 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
15291 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
15292 if (BYTES_BIG_ENDIAN
)
15293 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
15294 shift
, 1, OPTAB_LIB_WIDEN
);
15297 /* Mask for insertion. */
15298 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
15299 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15305 /* A subroutine of the various atomic expanders. For sub-word operands,
15306 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15309 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
15313 x
= gen_reg_rtx (SImode
);
15314 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
15315 gen_rtx_NOT (SImode
, mask
),
15318 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
15323 /* A subroutine of the various atomic expanders. For sub-word operands,
15324 extract WIDE to NARROW via SHIFT. */
15327 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
15329 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
15330 wide
, 1, OPTAB_LIB_WIDEN
);
15331 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
15334 /* Expand an atomic compare and swap operation. */
15337 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
15339 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
15340 rtx label1
, label2
, x
, mask
, shift
;
15341 machine_mode mode
, orig_mode
;
15342 enum memmodel mod_s
, mod_f
;
15345 boolval
= operands
[0];
15346 retval
= operands
[1];
15348 oldval
= operands
[3];
15349 newval
= operands
[4];
15350 is_weak
= (INTVAL (operands
[5]) != 0);
15351 mod_s
= memmodel_base (INTVAL (operands
[6]));
15352 mod_f
= memmodel_base (INTVAL (operands
[7]));
15353 orig_mode
= mode
= GET_MODE (mem
);
15355 mask
= shift
= NULL_RTX
;
15356 if (mode
== QImode
|| mode
== HImode
)
15358 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15359 lwarx and shift/mask operations. With power8, we need to do the
15360 comparison in SImode, but the store is still done in QI/HImode. */
15361 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
15363 if (!TARGET_SYNC_HI_QI
)
15365 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
15367 /* Shift and mask OLDVAL into position with the word. */
15368 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
15369 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15371 /* Shift and mask NEWVAL into position within the word. */
15372 newval
= convert_modes (SImode
, mode
, newval
, 1);
15373 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
15374 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15377 /* Prepare to adjust the return value. */
15378 retval
= gen_reg_rtx (SImode
);
15381 else if (reg_overlap_mentioned_p (retval
, oldval
))
15382 oldval
= copy_to_reg (oldval
);
15384 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
15385 oldval
= copy_to_mode_reg (mode
, oldval
);
15387 if (reg_overlap_mentioned_p (retval
, newval
))
15388 newval
= copy_to_reg (newval
);
15390 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
15395 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
15396 emit_label (XEXP (label1
, 0));
15398 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
15400 emit_load_locked (mode
, retval
, mem
);
15404 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
15405 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15407 cond
= gen_reg_rtx (CCmode
);
15408 /* If we have TImode, synthesize a comparison. */
15409 if (mode
!= TImode
)
15410 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
15413 rtx xor1_result
= gen_reg_rtx (DImode
);
15414 rtx xor2_result
= gen_reg_rtx (DImode
);
15415 rtx or_result
= gen_reg_rtx (DImode
);
15416 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
15417 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
15418 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
15419 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
15421 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
15422 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
15423 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
15424 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
15427 emit_insn (gen_rtx_SET (cond
, x
));
15429 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
15430 emit_unlikely_jump (x
, label2
);
15434 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
15436 emit_store_conditional (orig_mode
, cond
, mem
, x
);
15440 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
15441 emit_unlikely_jump (x
, label1
);
15444 if (!is_mm_relaxed (mod_f
))
15445 emit_label (XEXP (label2
, 0));
15447 rs6000_post_atomic_barrier (mod_s
);
15449 if (is_mm_relaxed (mod_f
))
15450 emit_label (XEXP (label2
, 0));
15453 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
15454 else if (mode
!= GET_MODE (operands
[1]))
15455 convert_move (operands
[1], retval
, 1);
15457 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15458 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
15459 emit_insn (gen_rtx_SET (boolval
, x
));
15462 /* Expand an atomic exchange operation. */
15465 rs6000_expand_atomic_exchange (rtx operands
[])
15467 rtx retval
, mem
, val
, cond
;
15469 enum memmodel model
;
15470 rtx label
, x
, mask
, shift
;
15472 retval
= operands
[0];
15475 model
= memmodel_base (INTVAL (operands
[3]));
15476 mode
= GET_MODE (mem
);
15478 mask
= shift
= NULL_RTX
;
15479 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
15481 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
15483 /* Shift and mask VAL into position with the word. */
15484 val
= convert_modes (SImode
, mode
, val
, 1);
15485 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
15486 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15488 /* Prepare to adjust the return value. */
15489 retval
= gen_reg_rtx (SImode
);
15493 mem
= rs6000_pre_atomic_barrier (mem
, model
);
15495 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
15496 emit_label (XEXP (label
, 0));
15498 emit_load_locked (mode
, retval
, mem
);
15502 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
15504 cond
= gen_reg_rtx (CCmode
);
15505 emit_store_conditional (mode
, cond
, mem
, x
);
15507 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
15508 emit_unlikely_jump (x
, label
);
15510 rs6000_post_atomic_barrier (model
);
15513 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
15516 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15517 to perform. MEM is the memory on which to operate. VAL is the second
15518 operand of the binary operator. BEFORE and AFTER are optional locations to
15519 return the value of MEM either before of after the operation. MODEL_RTX
15520 is a CONST_INT containing the memory model to use. */
15523 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
15524 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
15526 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
15527 machine_mode mode
= GET_MODE (mem
);
15528 machine_mode store_mode
= mode
;
15529 rtx label
, x
, cond
, mask
, shift
;
15530 rtx before
= orig_before
, after
= orig_after
;
15532 mask
= shift
= NULL_RTX
;
15533 /* On power8, we want to use SImode for the operation. On previous systems,
15534 use the operation in a subword and shift/mask to get the proper byte or
15536 if (mode
== QImode
|| mode
== HImode
)
15538 if (TARGET_SYNC_HI_QI
)
15540 val
= convert_modes (SImode
, mode
, val
, 1);
15542 /* Prepare to adjust the return value. */
15543 before
= gen_reg_rtx (SImode
);
15545 after
= gen_reg_rtx (SImode
);
15550 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
15552 /* Shift and mask VAL into position with the word. */
15553 val
= convert_modes (SImode
, mode
, val
, 1);
15554 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
15555 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15561 /* We've already zero-extended VAL. That is sufficient to
15562 make certain that it does not affect other bits. */
15567 /* If we make certain that all of the other bits in VAL are
15568 set, that will be sufficient to not affect other bits. */
15569 x
= gen_rtx_NOT (SImode
, mask
);
15570 x
= gen_rtx_IOR (SImode
, x
, val
);
15571 emit_insn (gen_rtx_SET (val
, x
));
15578 /* These will all affect bits outside the field and need
15579 adjustment via MASK within the loop. */
15583 gcc_unreachable ();
15586 /* Prepare to adjust the return value. */
15587 before
= gen_reg_rtx (SImode
);
15589 after
= gen_reg_rtx (SImode
);
15590 store_mode
= mode
= SImode
;
15594 mem
= rs6000_pre_atomic_barrier (mem
, model
);
15596 label
= gen_label_rtx ();
15597 emit_label (label
);
15598 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
15600 if (before
== NULL_RTX
)
15601 before
= gen_reg_rtx (mode
);
15603 emit_load_locked (mode
, before
, mem
);
15607 x
= expand_simple_binop (mode
, AND
, before
, val
,
15608 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15609 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
15613 after
= expand_simple_binop (mode
, code
, before
, val
,
15614 after
, 1, OPTAB_LIB_WIDEN
);
15620 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
15621 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15622 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
15624 else if (store_mode
!= mode
)
15625 x
= convert_modes (store_mode
, mode
, x
, 1);
15627 cond
= gen_reg_rtx (CCmode
);
15628 emit_store_conditional (store_mode
, cond
, mem
, x
);
15630 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
15631 emit_unlikely_jump (x
, label
);
15633 rs6000_post_atomic_barrier (model
);
15637 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15638 then do the calcuations in a SImode register. */
15640 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
15642 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
15644 else if (store_mode
!= mode
)
15646 /* QImode/HImode on machines with lbarx/lharx where we do the native
15647 operation and then do the calcuations in a SImode register. */
15649 convert_move (orig_before
, before
, 1);
15651 convert_move (orig_after
, after
, 1);
15653 else if (orig_after
&& after
!= orig_after
)
15654 emit_move_insn (orig_after
, after
);
15657 /* Emit instructions to move SRC to DST. Called by splitters for
15658 multi-register moves. It will emit at most one instruction for
15659 each register that is accessed; that is, it won't emit li/lis pairs
15660 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15664 rs6000_split_multireg_move (rtx dst
, rtx src
)
15666 /* The register number of the first register being moved. */
15668 /* The mode that is to be moved. */
15670 /* The mode that the move is being done in, and its size. */
15671 machine_mode reg_mode
;
15673 /* The number of registers that will be moved. */
15676 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
15677 mode
= GET_MODE (dst
);
15678 nregs
= hard_regno_nregs (reg
, mode
);
15679 if (FP_REGNO_P (reg
))
15680 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
15681 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
15682 else if (ALTIVEC_REGNO_P (reg
))
15683 reg_mode
= V16QImode
;
15685 reg_mode
= word_mode
;
15686 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
15688 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
15690 /* TDmode residing in FP registers is special, since the ISA requires that
15691 the lower-numbered word of a register pair is always the most significant
15692 word, even in little-endian mode. This does not match the usual subreg
15693 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15694 the appropriate constituent registers "by hand" in little-endian mode.
15696 Note we do not need to check for destructive overlap here since TDmode
15697 can only reside in even/odd register pairs. */
15698 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
15703 for (i
= 0; i
< nregs
; i
++)
15705 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
15706 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
15708 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
15709 i
* reg_mode_size
);
15711 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
15712 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
15714 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
15715 i
* reg_mode_size
);
15717 emit_insn (gen_rtx_SET (p_dst
, p_src
));
15723 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
15725 /* Move register range backwards, if we might have destructive
15728 for (i
= nregs
- 1; i
>= 0; i
--)
15729 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
15730 i
* reg_mode_size
),
15731 simplify_gen_subreg (reg_mode
, src
, mode
,
15732 i
* reg_mode_size
)));
15738 bool used_update
= false;
15739 rtx restore_basereg
= NULL_RTX
;
15741 if (MEM_P (src
) && INT_REGNO_P (reg
))
15745 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
15746 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
15749 breg
= XEXP (XEXP (src
, 0), 0);
15750 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
15751 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
15752 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
15753 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
15754 src
= replace_equiv_address (src
, breg
);
15756 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
15758 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
15760 rtx basereg
= XEXP (XEXP (src
, 0), 0);
15763 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
15764 emit_insn (gen_rtx_SET (ndst
,
15765 gen_rtx_MEM (reg_mode
,
15767 used_update
= true;
15770 emit_insn (gen_rtx_SET (basereg
,
15771 XEXP (XEXP (src
, 0), 1)));
15772 src
= replace_equiv_address (src
, basereg
);
15776 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
15777 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
15778 src
= replace_equiv_address (src
, basereg
);
15782 breg
= XEXP (src
, 0);
15783 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
15784 breg
= XEXP (breg
, 0);
15786 /* If the base register we are using to address memory is
15787 also a destination reg, then change that register last. */
15789 && REGNO (breg
) >= REGNO (dst
)
15790 && REGNO (breg
) < REGNO (dst
) + nregs
)
15791 j
= REGNO (breg
) - REGNO (dst
);
15793 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
15797 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
15798 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
15801 breg
= XEXP (XEXP (dst
, 0), 0);
15802 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
15803 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
15804 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
15806 /* We have to update the breg before doing the store.
15807 Use store with update, if available. */
15811 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
15812 emit_insn (TARGET_32BIT
15813 ? (TARGET_POWERPC64
15814 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
15815 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
15816 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
15817 used_update
= true;
15820 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
15821 dst
= replace_equiv_address (dst
, breg
);
15823 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
15824 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
15826 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
15828 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
15831 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
15832 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
15835 used_update
= true;
15838 emit_insn (gen_rtx_SET (basereg
,
15839 XEXP (XEXP (dst
, 0), 1)));
15840 dst
= replace_equiv_address (dst
, basereg
);
15844 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
15845 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
15846 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
15848 && REG_P (offsetreg
)
15849 && REGNO (basereg
) != REGNO (offsetreg
));
15850 if (REGNO (basereg
) == 0)
15852 rtx tmp
= offsetreg
;
15853 offsetreg
= basereg
;
15856 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
15857 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
15858 dst
= replace_equiv_address (dst
, basereg
);
15861 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
15862 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
15865 for (i
= 0; i
< nregs
; i
++)
15867 /* Calculate index to next subword. */
15872 /* If compiler already emitted move of first word by
15873 store with update, no need to do anything. */
15874 if (j
== 0 && used_update
)
15877 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
15878 j
* reg_mode_size
),
15879 simplify_gen_subreg (reg_mode
, src
, mode
,
15880 j
* reg_mode_size
)));
15882 if (restore_basereg
!= NULL_RTX
)
15883 emit_insn (restore_basereg
);
15887 static GTY(()) alias_set_type TOC_alias_set
= -1;
15890 get_TOC_alias_set (void)
15892 if (TOC_alias_set
== -1)
15893 TOC_alias_set
= new_alias_set ();
15894 return TOC_alias_set
;
15897 /* The mode the ABI uses for a word. This is not the same as word_mode
15898 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15900 static scalar_int_mode
15901 rs6000_abi_word_mode (void)
15903 return TARGET_32BIT
? SImode
: DImode
;
15906 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15908 rs6000_offload_options (void)
15911 return xstrdup ("-foffload-abi=lp64");
15913 return xstrdup ("-foffload-abi=ilp32");
15917 /* A quick summary of the various types of 'constant-pool tables'
15920 Target Flags Name One table per
15921 AIX (none) AIX TOC object file
15922 AIX -mfull-toc AIX TOC object file
15923 AIX -mminimal-toc AIX minimal TOC translation unit
15924 SVR4/EABI (none) SVR4 SDATA object file
15925 SVR4/EABI -fpic SVR4 pic object file
15926 SVR4/EABI -fPIC SVR4 PIC translation unit
15927 SVR4/EABI -mrelocatable EABI TOC function
15928 SVR4/EABI -maix AIX TOC object file
15929 SVR4/EABI -maix -mminimal-toc
15930 AIX minimal TOC translation unit
15932 Name Reg. Set by entries contains:
15933 made by addrs? fp? sum?
15935 AIX TOC 2 crt0 as Y option option
15936 AIX minimal TOC 30 prolog gcc Y Y option
15937 SVR4 SDATA 13 crt0 gcc N Y N
15938 SVR4 pic 30 prolog ld Y not yet N
15939 SVR4 PIC 30 prolog gcc Y option option
15940 EABI TOC 30 prolog gcc Y option option
15944 /* Hash functions for the hash table. */
15947 rs6000_hash_constant (rtx k
)
15949 enum rtx_code code
= GET_CODE (k
);
15950 machine_mode mode
= GET_MODE (k
);
15951 unsigned result
= (code
<< 3) ^ mode
;
15952 const char *format
;
15955 format
= GET_RTX_FORMAT (code
);
15956 flen
= strlen (format
);
15962 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
15964 case CONST_WIDE_INT
:
15967 flen
= CONST_WIDE_INT_NUNITS (k
);
15968 for (i
= 0; i
< flen
; i
++)
15969 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
15974 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
15984 for (; fidx
< flen
; fidx
++)
15985 switch (format
[fidx
])
15990 const char *str
= XSTR (k
, fidx
);
15991 len
= strlen (str
);
15992 result
= result
* 613 + len
;
15993 for (i
= 0; i
< len
; i
++)
15994 result
= result
* 613 + (unsigned) str
[i
];
15999 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
16003 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
16006 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
16007 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
16011 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
16012 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
16019 gcc_unreachable ();
16026 toc_hasher::hash (toc_hash_struct
*thc
)
16028 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
16031 /* Compare H1 and H2 for equivalence. */
16034 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
16039 if (h1
->key_mode
!= h2
->key_mode
)
16042 return rtx_equal_p (r1
, r2
);
16045 /* These are the names given by the C++ front-end to vtables, and
16046 vtable-like objects. Ideally, this logic should not be here;
16047 instead, there should be some programmatic way of inquiring as
16048 to whether or not an object is a vtable. */
16050 #define VTABLE_NAME_P(NAME) \
16051 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16052 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16053 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16054 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16055 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16057 #ifdef NO_DOLLAR_IN_LABEL
16058 /* Return a GGC-allocated character string translating dollar signs in
16059 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16062 rs6000_xcoff_strip_dollar (const char *name
)
16068 q
= (const char *) strchr (name
, '$');
16070 if (q
== 0 || q
== name
)
16073 len
= strlen (name
);
16074 strip
= XALLOCAVEC (char, len
+ 1);
16075 strcpy (strip
, name
);
16076 p
= strip
+ (q
- name
);
16080 p
= strchr (p
+ 1, '$');
16083 return ggc_alloc_string (strip
, len
);
16088 rs6000_output_symbol_ref (FILE *file
, rtx x
)
16090 const char *name
= XSTR (x
, 0);
16092 /* Currently C++ toc references to vtables can be emitted before it
16093 is decided whether the vtable is public or private. If this is
16094 the case, then the linker will eventually complain that there is
16095 a reference to an unknown section. Thus, for vtables only,
16096 we emit the TOC reference to reference the identifier and not the
16098 if (VTABLE_NAME_P (name
))
16100 RS6000_OUTPUT_BASENAME (file
, name
);
16103 assemble_name (file
, name
);
16106 /* Output a TOC entry. We derive the entry name from what is being
16110 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
16113 const char *name
= buf
;
16115 HOST_WIDE_INT offset
= 0;
16117 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
16119 /* When the linker won't eliminate them, don't output duplicate
16120 TOC entries (this happens on AIX if there is any kind of TOC,
16121 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16123 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
16125 struct toc_hash_struct
*h
;
16127 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16128 time because GGC is not initialized at that point. */
16129 if (toc_hash_table
== NULL
)
16130 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
16132 h
= ggc_alloc
<toc_hash_struct
> ();
16134 h
->key_mode
= mode
;
16135 h
->labelno
= labelno
;
16137 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
16138 if (*found
== NULL
)
16140 else /* This is indeed a duplicate.
16141 Set this label equal to that label. */
16143 fputs ("\t.set ", file
);
16144 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
16145 fprintf (file
, "%d,", labelno
);
16146 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
16147 fprintf (file
, "%d\n", ((*found
)->labelno
));
16150 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
16151 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
16152 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
16154 fputs ("\t.set ", file
);
16155 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
16156 fprintf (file
, "%d,", labelno
);
16157 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
16158 fprintf (file
, "%d\n", ((*found
)->labelno
));
16165 /* If we're going to put a double constant in the TOC, make sure it's
16166 aligned properly when strict alignment is on. */
16167 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
16168 && STRICT_ALIGNMENT
16169 && GET_MODE_BITSIZE (mode
) >= 64
16170 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
16171 ASM_OUTPUT_ALIGN (file
, 3);
16174 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
16176 /* Handle FP constants specially. Note that if we have a minimal
16177 TOC, things we put here aren't actually in the TOC, so we can allow
16179 if (CONST_DOUBLE_P (x
)
16180 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
16181 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
16185 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
16186 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16188 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16192 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16193 fputs (DOUBLE_INT_ASM_OP
, file
);
16195 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16196 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
16197 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
16198 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
16199 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
16200 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
16201 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
16202 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
16207 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16208 fputs ("\t.long ", file
);
16210 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16211 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
16212 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
16213 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16214 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
16215 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
16219 else if (CONST_DOUBLE_P (x
)
16220 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
16224 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
16225 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16227 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16231 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16232 fputs (DOUBLE_INT_ASM_OP
, file
);
16234 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
16235 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
16236 fprintf (file
, "0x%lx%08lx\n",
16237 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
16238 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
16243 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16244 fputs ("\t.long ", file
);
16246 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
16247 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
16248 fprintf (file
, "0x%lx,0x%lx\n",
16249 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
16253 else if (CONST_DOUBLE_P (x
)
16254 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
16258 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
16259 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
16261 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
16265 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16266 fputs (DOUBLE_INT_ASM_OP
, file
);
16268 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
16269 if (WORDS_BIG_ENDIAN
)
16270 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
16272 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
16277 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16278 fputs ("\t.long ", file
);
16280 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
16281 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
16285 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
16287 unsigned HOST_WIDE_INT low
;
16288 HOST_WIDE_INT high
;
16290 low
= INTVAL (x
) & 0xffffffff;
16291 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
16293 /* TOC entries are always Pmode-sized, so when big-endian
16294 smaller integer constants in the TOC need to be padded.
16295 (This is still a win over putting the constants in
16296 a separate constant pool, because then we'd have
16297 to have both a TOC entry _and_ the actual constant.)
16299 For a 32-bit target, CONST_INT values are loaded and shifted
16300 entirely within `low' and can be stored in one TOC entry. */
16302 /* It would be easy to make this work, but it doesn't now. */
16303 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
16305 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
16308 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
16309 high
= (HOST_WIDE_INT
) low
>> 32;
16315 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16316 fputs (DOUBLE_INT_ASM_OP
, file
);
16318 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
16319 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
16320 fprintf (file
, "0x%lx%08lx\n",
16321 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
16326 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
16328 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16329 fputs ("\t.long ", file
);
16331 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
16332 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
16333 fprintf (file
, "0x%lx,0x%lx\n",
16334 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
16338 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16339 fputs ("\t.long ", file
);
16341 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
16342 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
16348 if (GET_CODE (x
) == CONST
)
16350 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
16351 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
16353 base
= XEXP (XEXP (x
, 0), 0);
16354 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
16357 switch (GET_CODE (base
))
16360 name
= XSTR (base
, 0);
16364 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
16365 CODE_LABEL_NUMBER (XEXP (base
, 0)));
16369 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
16373 gcc_unreachable ();
16376 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16377 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
16380 fputs ("\t.tc ", file
);
16381 RS6000_OUTPUT_BASENAME (file
, name
);
16384 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
16386 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
16388 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16389 after other TOC symbols, reducing overflow of small TOC access
16390 to [TC] symbols. */
16391 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
16392 ? "[TE]," : "[TC],", file
);
16395 /* Currently C++ toc references to vtables can be emitted before it
16396 is decided whether the vtable is public or private. If this is
16397 the case, then the linker will eventually complain that there is
16398 a TOC reference to an unknown section. Thus, for vtables only,
16399 we emit the TOC reference to reference the symbol and not the
16401 if (VTABLE_NAME_P (name
))
16403 RS6000_OUTPUT_BASENAME (file
, name
);
16405 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
16406 else if (offset
> 0)
16407 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
16410 output_addr_const (file
, x
);
16413 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
16415 switch (SYMBOL_REF_TLS_MODEL (base
))
16419 case TLS_MODEL_LOCAL_EXEC
:
16420 fputs ("@le", file
);
16422 case TLS_MODEL_INITIAL_EXEC
:
16423 fputs ("@ie", file
);
16425 /* Use global-dynamic for local-dynamic. */
16426 case TLS_MODEL_GLOBAL_DYNAMIC
:
16427 case TLS_MODEL_LOCAL_DYNAMIC
:
16429 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
16430 fputs ("\t.tc .", file
);
16431 RS6000_OUTPUT_BASENAME (file
, name
);
16432 fputs ("[TC],", file
);
16433 output_addr_const (file
, x
);
16434 fputs ("@m", file
);
16437 gcc_unreachable ();
16445 /* Output an assembler pseudo-op to write an ASCII string of N characters
16446 starting at P to FILE.
16448 On the RS/6000, we have to do this using the .byte operation and
16449 write out special characters outside the quoted string.
16450 Also, the assembler is broken; very long strings are truncated,
16451 so we must artificially break them up early. */
16454 output_ascii (FILE *file
, const char *p
, int n
)
16457 int i
, count_string
;
16458 const char *for_string
= "\t.byte \"";
16459 const char *for_decimal
= "\t.byte ";
16460 const char *to_close
= NULL
;
16463 for (i
= 0; i
< n
; i
++)
16466 if (c
>= ' ' && c
< 0177)
16469 fputs (for_string
, file
);
16472 /* Write two quotes to get one. */
16480 for_decimal
= "\"\n\t.byte ";
16484 if (count_string
>= 512)
16486 fputs (to_close
, file
);
16488 for_string
= "\t.byte \"";
16489 for_decimal
= "\t.byte ";
16497 fputs (for_decimal
, file
);
16498 fprintf (file
, "%d", c
);
16500 for_string
= "\n\t.byte \"";
16501 for_decimal
= ", ";
16507 /* Now close the string if we have written one. Then end the line. */
16509 fputs (to_close
, file
);
16512 /* Generate a unique section name for FILENAME for a section type
16513 represented by SECTION_DESC. Output goes into BUF.
16515 SECTION_DESC can be any string, as long as it is different for each
16516 possible section type.
16518 We name the section in the same manner as xlc. The name begins with an
16519 underscore followed by the filename (after stripping any leading directory
16520 names) with the last period replaced by the string SECTION_DESC. If
16521 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16525 rs6000_gen_section_name (char **buf
, const char *filename
,
16526 const char *section_desc
)
16528 const char *q
, *after_last_slash
, *last_period
= 0;
16532 after_last_slash
= filename
;
16533 for (q
= filename
; *q
; q
++)
16536 after_last_slash
= q
+ 1;
16537 else if (*q
== '.')
16541 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
16542 *buf
= (char *) xmalloc (len
);
16547 for (q
= after_last_slash
; *q
; q
++)
16549 if (q
== last_period
)
16551 strcpy (p
, section_desc
);
16552 p
+= strlen (section_desc
);
16556 else if (ISALNUM (*q
))
16560 if (last_period
== 0)
16561 strcpy (p
, section_desc
);
16566 /* Emit profile function. */
16569 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
16571 /* Non-standard profiling for kernels, which just saves LR then calls
16572 _mcount without worrying about arg saves. The idea is to change
16573 the function prologue as little as possible as it isn't easy to
16574 account for arg save/restore code added just for _mcount. */
16575 if (TARGET_PROFILE_KERNEL
)
16578 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
16580 #ifndef NO_PROFILE_COUNTERS
16581 # define NO_PROFILE_COUNTERS 0
16583 if (NO_PROFILE_COUNTERS
)
16584 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
16585 LCT_NORMAL
, VOIDmode
);
16589 const char *label_name
;
16592 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
16593 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
16594 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
16596 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
16597 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
16600 else if (DEFAULT_ABI
== ABI_DARWIN
)
16602 const char *mcount_name
= RS6000_MCOUNT
;
16603 int caller_addr_regno
= LR_REGNO
;
16605 /* Be conservative and always set this, at least for now. */
16606 crtl
->uses_pic_offset_table
= 1;
16609 /* For PIC code, set up a stub and collect the caller's address
16610 from r0, which is where the prologue puts it. */
16611 if (MACHOPIC_INDIRECT
16612 && crtl
->uses_pic_offset_table
)
16613 caller_addr_regno
= 0;
16615 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
16616 LCT_NORMAL
, VOIDmode
,
16617 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
16621 /* Write function profiler code. */
16624 output_function_profiler (FILE *file
, int labelno
)
16628 switch (DEFAULT_ABI
)
16631 gcc_unreachable ();
16636 warning (0, "no profiling of 64-bit code for this ABI");
16639 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
16640 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
16641 if (NO_PROFILE_COUNTERS
)
16643 asm_fprintf (file
, "\tstw %s,4(%s)\n",
16644 reg_names
[0], reg_names
[1]);
16646 else if (TARGET_SECURE_PLT
&& flag_pic
)
16648 if (TARGET_LINK_STACK
)
16651 get_ppc476_thunk_name (name
);
16652 asm_fprintf (file
, "\tbl %s\n", name
);
16655 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
16656 asm_fprintf (file
, "\tstw %s,4(%s)\n",
16657 reg_names
[0], reg_names
[1]);
16658 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
16659 asm_fprintf (file
, "\taddis %s,%s,",
16660 reg_names
[12], reg_names
[12]);
16661 assemble_name (file
, buf
);
16662 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
16663 assemble_name (file
, buf
);
16664 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
16666 else if (flag_pic
== 1)
16668 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
16669 asm_fprintf (file
, "\tstw %s,4(%s)\n",
16670 reg_names
[0], reg_names
[1]);
16671 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
16672 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
16673 assemble_name (file
, buf
);
16674 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
16676 else if (flag_pic
> 1)
16678 asm_fprintf (file
, "\tstw %s,4(%s)\n",
16679 reg_names
[0], reg_names
[1]);
16680 /* Now, we need to get the address of the label. */
16681 if (TARGET_LINK_STACK
)
16684 get_ppc476_thunk_name (name
);
16685 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
16686 assemble_name (file
, buf
);
16687 fputs ("-.\n1:", file
);
16688 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
16689 asm_fprintf (file
, "\taddi %s,%s,4\n",
16690 reg_names
[11], reg_names
[11]);
16694 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
16695 assemble_name (file
, buf
);
16696 fputs ("-.\n1:", file
);
16697 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
16699 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
16700 reg_names
[0], reg_names
[11]);
16701 asm_fprintf (file
, "\tadd %s,%s,%s\n",
16702 reg_names
[0], reg_names
[0], reg_names
[11]);
16706 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
16707 assemble_name (file
, buf
);
16708 fputs ("@ha\n", file
);
16709 asm_fprintf (file
, "\tstw %s,4(%s)\n",
16710 reg_names
[0], reg_names
[1]);
16711 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
16712 assemble_name (file
, buf
);
16713 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
16716 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16717 fprintf (file
, "\tbl %s%s\n",
16718 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
16724 /* Don't do anything, done in output_profile_hook (). */
16731 /* The following variable value is the last issued insn. */
16733 static rtx_insn
*last_scheduled_insn
;
16735 /* The following variable helps to balance issuing of load and
16736 store instructions */
16738 static int load_store_pendulum
;
16740 /* The following variable helps pair divide insns during scheduling. */
16741 static int divide_cnt
;
16742 /* The following variable helps pair and alternate vector and vector load
16743 insns during scheduling. */
16744 static int vec_pairing
;
16747 /* Power4 load update and store update instructions are cracked into a
16748 load or store and an integer insn which are executed in the same cycle.
16749 Branches have their own dispatch slot which does not count against the
16750 GCC issue rate, but it changes the program flow so there are no other
16751 instructions to issue in this cycle. */
16754 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
16756 last_scheduled_insn
= insn
;
16757 if (GET_CODE (PATTERN (insn
)) == USE
16758 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
16760 cached_can_issue_more
= more
;
16761 return cached_can_issue_more
;
16764 if (insn_terminates_group_p (insn
, current_group
))
16766 cached_can_issue_more
= 0;
16767 return cached_can_issue_more
;
16770 /* If no reservation, but reach here */
16771 if (recog_memoized (insn
) < 0)
16774 if (rs6000_sched_groups
)
16776 if (is_microcoded_insn (insn
))
16777 cached_can_issue_more
= 0;
16778 else if (is_cracked_insn (insn
))
16779 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
16781 cached_can_issue_more
= more
- 1;
16783 return cached_can_issue_more
;
16786 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
16789 cached_can_issue_more
= more
- 1;
16790 return cached_can_issue_more
;
16794 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
16796 int r
= rs6000_variable_issue_1 (insn
, more
);
16798 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
16802 /* Adjust the cost of a scheduling dependency. Return the new cost of
16803 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16806 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
16809 enum attr_type attr_type
;
16811 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
16818 /* Data dependency; DEP_INSN writes a register that INSN reads
16819 some cycles later. */
16821 /* Separate a load from a narrower, dependent store. */
16822 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
16823 || rs6000_tune
== PROCESSOR_FUTURE
)
16824 && GET_CODE (PATTERN (insn
)) == SET
16825 && GET_CODE (PATTERN (dep_insn
)) == SET
16826 && MEM_P (XEXP (PATTERN (insn
), 1))
16827 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
16828 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
16829 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
16832 attr_type
= get_attr_type (insn
);
16837 /* Tell the first scheduling pass about the latency between
16838 a mtctr and bctr (and mtlr and br/blr). The first
16839 scheduling pass will not know about this latency since
16840 the mtctr instruction, which has the latency associated
16841 to it, will be generated by reload. */
16844 /* Leave some extra cycles between a compare and its
16845 dependent branch, to inhibit expensive mispredicts. */
16846 if ((rs6000_tune
== PROCESSOR_PPC603
16847 || rs6000_tune
== PROCESSOR_PPC604
16848 || rs6000_tune
== PROCESSOR_PPC604e
16849 || rs6000_tune
== PROCESSOR_PPC620
16850 || rs6000_tune
== PROCESSOR_PPC630
16851 || rs6000_tune
== PROCESSOR_PPC750
16852 || rs6000_tune
== PROCESSOR_PPC7400
16853 || rs6000_tune
== PROCESSOR_PPC7450
16854 || rs6000_tune
== PROCESSOR_PPCE5500
16855 || rs6000_tune
== PROCESSOR_PPCE6500
16856 || rs6000_tune
== PROCESSOR_POWER4
16857 || rs6000_tune
== PROCESSOR_POWER5
16858 || rs6000_tune
== PROCESSOR_POWER7
16859 || rs6000_tune
== PROCESSOR_POWER8
16860 || rs6000_tune
== PROCESSOR_POWER9
16861 || rs6000_tune
== PROCESSOR_FUTURE
16862 || rs6000_tune
== PROCESSOR_CELL
)
16863 && recog_memoized (dep_insn
)
16864 && (INSN_CODE (dep_insn
) >= 0))
16866 switch (get_attr_type (dep_insn
))
16869 case TYPE_FPCOMPARE
:
16870 case TYPE_CR_LOGICAL
:
16874 if (get_attr_dot (dep_insn
) == DOT_YES
)
16879 if (get_attr_dot (dep_insn
) == DOT_YES
16880 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
16891 if ((rs6000_tune
== PROCESSOR_POWER6
)
16892 && recog_memoized (dep_insn
)
16893 && (INSN_CODE (dep_insn
) >= 0))
16896 if (GET_CODE (PATTERN (insn
)) != SET
)
16897 /* If this happens, we have to extend this to schedule
16898 optimally. Return default for now. */
16901 /* Adjust the cost for the case where the value written
16902 by a fixed point operation is used as the address
16903 gen value on a store. */
16904 switch (get_attr_type (dep_insn
))
16909 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
16910 return get_attr_sign_extend (dep_insn
)
16911 == SIGN_EXTEND_YES
? 6 : 4;
16916 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
16917 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
16927 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
16935 if (get_attr_update (dep_insn
) == UPDATE_YES
16936 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
16942 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
16948 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
16949 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
16959 if ((rs6000_tune
== PROCESSOR_POWER6
)
16960 && recog_memoized (dep_insn
)
16961 && (INSN_CODE (dep_insn
) >= 0))
16964 /* Adjust the cost for the case where the value written
16965 by a fixed point instruction is used within the address
16966 gen portion of a subsequent load(u)(x) */
16967 switch (get_attr_type (dep_insn
))
16972 if (set_to_load_agen (dep_insn
, insn
))
16973 return get_attr_sign_extend (dep_insn
)
16974 == SIGN_EXTEND_YES
? 6 : 4;
16979 if (set_to_load_agen (dep_insn
, insn
))
16980 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
16990 if (set_to_load_agen (dep_insn
, insn
))
16998 if (get_attr_update (dep_insn
) == UPDATE_YES
16999 && set_to_load_agen (dep_insn
, insn
))
17005 if (set_to_load_agen (dep_insn
, insn
))
17011 if (set_to_load_agen (dep_insn
, insn
))
17012 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17022 if ((rs6000_tune
== PROCESSOR_POWER6
)
17023 && get_attr_update (insn
) == UPDATE_NO
17024 && recog_memoized (dep_insn
)
17025 && (INSN_CODE (dep_insn
) >= 0)
17026 && (get_attr_type (dep_insn
) == TYPE_MFFGPR
))
17033 /* Fall out to return default cost. */
17037 case REG_DEP_OUTPUT
:
17038 /* Output dependency; DEP_INSN writes a register that INSN writes some
17040 if ((rs6000_tune
== PROCESSOR_POWER6
)
17041 && recog_memoized (dep_insn
)
17042 && (INSN_CODE (dep_insn
) >= 0))
17044 attr_type
= get_attr_type (insn
);
17049 case TYPE_FPSIMPLE
:
17050 if (get_attr_type (dep_insn
) == TYPE_FP
17051 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
17055 if (get_attr_update (insn
) == UPDATE_NO
17056 && get_attr_type (dep_insn
) == TYPE_MFFGPR
)
17063 /* Fall through, no cost for output dependency. */
17067 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17072 gcc_unreachable ();
17078 /* Debug version of rs6000_adjust_cost. */
17081 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
17082 int cost
, unsigned int dw
)
17084 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
17092 default: dep
= "unknown depencency"; break;
17093 case REG_DEP_TRUE
: dep
= "data dependency"; break;
17094 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
17095 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
17099 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17100 "%s, insn:\n", ret
, cost
, dep
);
17108 /* The function returns a true if INSN is microcoded.
17109 Return false otherwise. */
17112 is_microcoded_insn (rtx_insn
*insn
)
17114 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17115 || GET_CODE (PATTERN (insn
)) == USE
17116 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17119 if (rs6000_tune
== PROCESSOR_CELL
)
17120 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
17122 if (rs6000_sched_groups
17123 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
17125 enum attr_type type
= get_attr_type (insn
);
17126 if ((type
== TYPE_LOAD
17127 && get_attr_update (insn
) == UPDATE_YES
17128 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
17129 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
17130 && get_attr_update (insn
) == UPDATE_YES
17131 && get_attr_indexed (insn
) == INDEXED_YES
)
17132 || type
== TYPE_MFCR
)
17139 /* The function returns true if INSN is cracked into 2 instructions
17140 by the processor (and therefore occupies 2 issue slots). */
17143 is_cracked_insn (rtx_insn
*insn
)
17145 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17146 || GET_CODE (PATTERN (insn
)) == USE
17147 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17150 if (rs6000_sched_groups
17151 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
17153 enum attr_type type
= get_attr_type (insn
);
17154 if ((type
== TYPE_LOAD
17155 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
17156 && get_attr_update (insn
) == UPDATE_NO
)
17157 || (type
== TYPE_LOAD
17158 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
17159 && get_attr_update (insn
) == UPDATE_YES
17160 && get_attr_indexed (insn
) == INDEXED_NO
)
17161 || (type
== TYPE_STORE
17162 && get_attr_update (insn
) == UPDATE_YES
17163 && get_attr_indexed (insn
) == INDEXED_NO
)
17164 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
17165 && get_attr_update (insn
) == UPDATE_YES
)
17166 || (type
== TYPE_CR_LOGICAL
17167 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
17168 || (type
== TYPE_EXTS
17169 && get_attr_dot (insn
) == DOT_YES
)
17170 || (type
== TYPE_SHIFT
17171 && get_attr_dot (insn
) == DOT_YES
17172 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
17173 || (type
== TYPE_MUL
17174 && get_attr_dot (insn
) == DOT_YES
)
17175 || type
== TYPE_DIV
17176 || (type
== TYPE_INSERT
17177 && get_attr_size (insn
) == SIZE_32
))
17184 /* The function returns true if INSN can be issued only from
17185 the branch slot. */
17188 is_branch_slot_insn (rtx_insn
*insn
)
17190 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17191 || GET_CODE (PATTERN (insn
)) == USE
17192 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17195 if (rs6000_sched_groups
)
17197 enum attr_type type
= get_attr_type (insn
);
17198 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
17206 /* The function returns true if out_inst sets a value that is
17207 used in the address generation computation of in_insn */
17209 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
17211 rtx out_set
, in_set
;
17213 /* For performance reasons, only handle the simple case where
17214 both loads are a single_set. */
17215 out_set
= single_set (out_insn
);
17218 in_set
= single_set (in_insn
);
17220 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
17226 /* Try to determine base/offset/size parts of the given MEM.
17227 Return true if successful, false if all the values couldn't
17230 This function only looks for REG or REG+CONST address forms.
17231 REG+REG address form will return false. */
17234 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
17235 HOST_WIDE_INT
*size
)
17238 if MEM_SIZE_KNOWN_P (mem
)
17239 *size
= MEM_SIZE (mem
);
17243 addr_rtx
= (XEXP (mem
, 0));
17244 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
17245 addr_rtx
= XEXP (addr_rtx
, 1);
17248 while (GET_CODE (addr_rtx
) == PLUS
17249 && CONST_INT_P (XEXP (addr_rtx
, 1)))
17251 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
17252 addr_rtx
= XEXP (addr_rtx
, 0);
17254 if (!REG_P (addr_rtx
))
17261 /* The function returns true if the target storage location of
17262 mem1 is adjacent to the target storage location of mem2 */
17263 /* Return 1 if memory locations are adjacent. */
17266 adjacent_mem_locations (rtx mem1
, rtx mem2
)
17269 HOST_WIDE_INT off1
, size1
, off2
, size2
;
17271 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
17272 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
17273 return ((REGNO (reg1
) == REGNO (reg2
))
17274 && ((off1
+ size1
== off2
)
17275 || (off2
+ size2
== off1
)));
17280 /* This function returns true if it can be determined that the two MEM
17281 locations overlap by at least 1 byte based on base reg/offset/size. */
17284 mem_locations_overlap (rtx mem1
, rtx mem2
)
17287 HOST_WIDE_INT off1
, size1
, off2
, size2
;
17289 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
17290 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
17291 return ((REGNO (reg1
) == REGNO (reg2
))
17292 && (((off1
<= off2
) && (off1
+ size1
> off2
))
17293 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
17298 /* A C statement (sans semicolon) to update the integer scheduling
17299 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17300 INSN earlier, reduce the priority to execute INSN later. Do not
17301 define this macro if you do not need to adjust the scheduling
17302 priorities of insns. */
17305 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
17307 rtx load_mem
, str_mem
;
17308 /* On machines (like the 750) which have asymmetric integer units,
17309 where one integer unit can do multiply and divides and the other
17310 can't, reduce the priority of multiply/divide so it is scheduled
17311 before other integer operations. */
17314 if (! INSN_P (insn
))
17317 if (GET_CODE (PATTERN (insn
)) == USE
)
17320 switch (rs6000_tune
) {
17321 case PROCESSOR_PPC750
:
17322 switch (get_attr_type (insn
))
17329 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
17330 priority
, priority
);
17331 if (priority
>= 0 && priority
< 0x01000000)
17338 if (insn_must_be_first_in_group (insn
)
17339 && reload_completed
17340 && current_sched_info
->sched_max_insns_priority
17341 && rs6000_sched_restricted_insns_priority
)
17344 /* Prioritize insns that can be dispatched only in the first
17346 if (rs6000_sched_restricted_insns_priority
== 1)
17347 /* Attach highest priority to insn. This means that in
17348 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17349 precede 'priority' (critical path) considerations. */
17350 return current_sched_info
->sched_max_insns_priority
;
17351 else if (rs6000_sched_restricted_insns_priority
== 2)
17352 /* Increase priority of insn by a minimal amount. This means that in
17353 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17354 considerations precede dispatch-slot restriction considerations. */
17355 return (priority
+ 1);
17358 if (rs6000_tune
== PROCESSOR_POWER6
17359 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
17360 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
17361 /* Attach highest priority to insn if the scheduler has just issued two
17362 stores and this instruction is a load, or two loads and this instruction
17363 is a store. Power6 wants loads and stores scheduled alternately
17365 return current_sched_info
->sched_max_insns_priority
;
17370 /* Return true if the instruction is nonpipelined on the Cell. */
17372 is_nonpipeline_insn (rtx_insn
*insn
)
17374 enum attr_type type
;
17375 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17376 || GET_CODE (PATTERN (insn
)) == USE
17377 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17380 type
= get_attr_type (insn
);
17381 if (type
== TYPE_MUL
17382 || type
== TYPE_DIV
17383 || type
== TYPE_SDIV
17384 || type
== TYPE_DDIV
17385 || type
== TYPE_SSQRT
17386 || type
== TYPE_DSQRT
17387 || type
== TYPE_MFCR
17388 || type
== TYPE_MFCRF
17389 || type
== TYPE_MFJMPR
)
17397 /* Return how many instructions the machine can issue per cycle. */
17400 rs6000_issue_rate (void)
17402 /* Unless scheduling for register pressure, use issue rate of 1 for
17403 first scheduling pass to decrease degradation. */
17404 if (!reload_completed
&& !flag_sched_pressure
)
17407 switch (rs6000_tune
) {
17408 case PROCESSOR_RS64A
:
17409 case PROCESSOR_PPC601
: /* ? */
17410 case PROCESSOR_PPC7450
:
17412 case PROCESSOR_PPC440
:
17413 case PROCESSOR_PPC603
:
17414 case PROCESSOR_PPC750
:
17415 case PROCESSOR_PPC7400
:
17416 case PROCESSOR_PPC8540
:
17417 case PROCESSOR_PPC8548
:
17418 case PROCESSOR_CELL
:
17419 case PROCESSOR_PPCE300C2
:
17420 case PROCESSOR_PPCE300C3
:
17421 case PROCESSOR_PPCE500MC
:
17422 case PROCESSOR_PPCE500MC64
:
17423 case PROCESSOR_PPCE5500
:
17424 case PROCESSOR_PPCE6500
:
17425 case PROCESSOR_TITAN
:
17427 case PROCESSOR_PPC476
:
17428 case PROCESSOR_PPC604
:
17429 case PROCESSOR_PPC604e
:
17430 case PROCESSOR_PPC620
:
17431 case PROCESSOR_PPC630
:
17433 case PROCESSOR_POWER4
:
17434 case PROCESSOR_POWER5
:
17435 case PROCESSOR_POWER6
:
17436 case PROCESSOR_POWER7
:
17438 case PROCESSOR_POWER8
:
17440 case PROCESSOR_POWER9
:
17441 case PROCESSOR_FUTURE
:
17448 /* Return how many instructions to look ahead for better insn
17452 rs6000_use_sched_lookahead (void)
17454 switch (rs6000_tune
)
17456 case PROCESSOR_PPC8540
:
17457 case PROCESSOR_PPC8548
:
17460 case PROCESSOR_CELL
:
17461 return (reload_completed
? 8 : 0);
17468 /* We are choosing insn from the ready queue. Return zero if INSN can be
17471 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
17473 if (ready_index
== 0)
17476 if (rs6000_tune
!= PROCESSOR_CELL
)
17479 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
17481 if (!reload_completed
17482 || is_nonpipeline_insn (insn
)
17483 || is_microcoded_insn (insn
))
17489 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17490 and return true. */
17493 find_mem_ref (rtx pat
, rtx
*mem_ref
)
17498 /* stack_tie does not produce any real memory traffic. */
17499 if (tie_operand (pat
, VOIDmode
))
17508 /* Recursively process the pattern. */
17509 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
17511 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
17515 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
17518 else if (fmt
[i
] == 'E')
17519 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
17521 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
17529 /* Determine if PAT is a PATTERN of a load insn. */
17532 is_load_insn1 (rtx pat
, rtx
*load_mem
)
17534 if (!pat
|| pat
== NULL_RTX
)
17537 if (GET_CODE (pat
) == SET
)
17538 return find_mem_ref (SET_SRC (pat
), load_mem
);
17540 if (GET_CODE (pat
) == PARALLEL
)
17544 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
17545 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
17552 /* Determine if INSN loads from memory. */
17555 is_load_insn (rtx insn
, rtx
*load_mem
)
17557 if (!insn
|| !INSN_P (insn
))
17563 return is_load_insn1 (PATTERN (insn
), load_mem
);
17566 /* Determine if PAT is a PATTERN of a store insn. */
17569 is_store_insn1 (rtx pat
, rtx
*str_mem
)
17571 if (!pat
|| pat
== NULL_RTX
)
17574 if (GET_CODE (pat
) == SET
)
17575 return find_mem_ref (SET_DEST (pat
), str_mem
);
17577 if (GET_CODE (pat
) == PARALLEL
)
17581 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
17582 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
17589 /* Determine if INSN stores to memory. */
17592 is_store_insn (rtx insn
, rtx
*str_mem
)
17594 if (!insn
|| !INSN_P (insn
))
17597 return is_store_insn1 (PATTERN (insn
), str_mem
);
17600 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17603 is_power9_pairable_vec_type (enum attr_type type
)
17607 case TYPE_VECSIMPLE
:
17608 case TYPE_VECCOMPLEX
:
17612 case TYPE_VECFLOAT
:
17614 case TYPE_VECDOUBLE
:
17622 /* Returns whether the dependence between INSN and NEXT is considered
17623 costly by the given target. */
17626 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
17630 rtx load_mem
, str_mem
;
17632 /* If the flag is not enabled - no dependence is considered costly;
17633 allow all dependent insns in the same group.
17634 This is the most aggressive option. */
17635 if (rs6000_sched_costly_dep
== no_dep_costly
)
17638 /* If the flag is set to 1 - a dependence is always considered costly;
17639 do not allow dependent instructions in the same group.
17640 This is the most conservative option. */
17641 if (rs6000_sched_costly_dep
== all_deps_costly
)
17644 insn
= DEP_PRO (dep
);
17645 next
= DEP_CON (dep
);
17647 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
17648 && is_load_insn (next
, &load_mem
)
17649 && is_store_insn (insn
, &str_mem
))
17650 /* Prevent load after store in the same group. */
17653 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
17654 && is_load_insn (next
, &load_mem
)
17655 && is_store_insn (insn
, &str_mem
)
17656 && DEP_TYPE (dep
) == REG_DEP_TRUE
17657 && mem_locations_overlap(str_mem
, load_mem
))
17658 /* Prevent load after store in the same group if it is a true
17662 /* The flag is set to X; dependences with latency >= X are considered costly,
17663 and will not be scheduled in the same group. */
17664 if (rs6000_sched_costly_dep
<= max_dep_latency
17665 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
17671 /* Return the next insn after INSN that is found before TAIL is reached,
17672 skipping any "non-active" insns - insns that will not actually occupy
17673 an issue slot. Return NULL_RTX if such an insn is not found. */
17676 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
17678 if (insn
== NULL_RTX
|| insn
== tail
)
17683 insn
= NEXT_INSN (insn
);
17684 if (insn
== NULL_RTX
|| insn
== tail
)
17688 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
17689 || (NONJUMP_INSN_P (insn
)
17690 && GET_CODE (PATTERN (insn
)) != USE
17691 && GET_CODE (PATTERN (insn
)) != CLOBBER
17692 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
17698 /* Move instruction at POS to the end of the READY list. */
17701 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
17707 for (i
= pos
; i
< lastpos
; i
++)
17708 ready
[i
] = ready
[i
+ 1];
17709 ready
[lastpos
] = tmp
;
17712 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17715 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
17717 /* For Power6, we need to handle some special cases to try and keep the
17718 store queue from overflowing and triggering expensive flushes.
17720 This code monitors how load and store instructions are being issued
17721 and skews the ready list one way or the other to increase the likelihood
17722 that a desired instruction is issued at the proper time.
17724 A couple of things are done. First, we maintain a "load_store_pendulum"
17725 to track the current state of load/store issue.
17727 - If the pendulum is at zero, then no loads or stores have been
17728 issued in the current cycle so we do nothing.
17730 - If the pendulum is 1, then a single load has been issued in this
17731 cycle and we attempt to locate another load in the ready list to
17734 - If the pendulum is -2, then two stores have already been
17735 issued in this cycle, so we increase the priority of the first load
17736 in the ready list to increase it's likelihood of being chosen first
17739 - If the pendulum is -1, then a single store has been issued in this
17740 cycle and we attempt to locate another store in the ready list to
17741 issue with it, preferring a store to an adjacent memory location to
17742 facilitate store pairing in the store queue.
17744 - If the pendulum is 2, then two loads have already been
17745 issued in this cycle, so we increase the priority of the first store
17746 in the ready list to increase it's likelihood of being chosen first
17749 - If the pendulum < -2 or > 2, then do nothing.
17751 Note: This code covers the most common scenarios. There exist non
17752 load/store instructions which make use of the LSU and which
17753 would need to be accounted for to strictly model the behavior
17754 of the machine. Those instructions are currently unaccounted
17755 for to help minimize compile time overhead of this code.
17758 rtx load_mem
, str_mem
;
17760 if (is_store_insn (last_scheduled_insn
, &str_mem
))
17761 /* Issuing a store, swing the load_store_pendulum to the left */
17762 load_store_pendulum
--;
17763 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
17764 /* Issuing a load, swing the load_store_pendulum to the right */
17765 load_store_pendulum
++;
17767 return cached_can_issue_more
;
17769 /* If the pendulum is balanced, or there is only one instruction on
17770 the ready list, then all is well, so return. */
17771 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
17772 return cached_can_issue_more
;
17774 if (load_store_pendulum
== 1)
17776 /* A load has been issued in this cycle. Scan the ready list
17777 for another load to issue with it */
17782 if (is_load_insn (ready
[pos
], &load_mem
))
17784 /* Found a load. Move it to the head of the ready list,
17785 and adjust it's priority so that it is more likely to
17787 move_to_end_of_ready (ready
, pos
, lastpos
);
17789 if (!sel_sched_p ()
17790 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
17791 INSN_PRIORITY (ready
[lastpos
])++;
17797 else if (load_store_pendulum
== -2)
17799 /* Two stores have been issued in this cycle. Increase the
17800 priority of the first load in the ready list to favor it for
17801 issuing in the next cycle. */
17806 if (is_load_insn (ready
[pos
], &load_mem
)
17808 && INSN_PRIORITY_KNOWN (ready
[pos
]))
17810 INSN_PRIORITY (ready
[pos
])++;
17812 /* Adjust the pendulum to account for the fact that a load
17813 was found and increased in priority. This is to prevent
17814 increasing the priority of multiple loads */
17815 load_store_pendulum
--;
17822 else if (load_store_pendulum
== -1)
17824 /* A store has been issued in this cycle. Scan the ready list for
17825 another store to issue with it, preferring a store to an adjacent
17827 int first_store_pos
= -1;
17833 if (is_store_insn (ready
[pos
], &str_mem
))
17836 /* Maintain the index of the first store found on the
17838 if (first_store_pos
== -1)
17839 first_store_pos
= pos
;
17841 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
17842 && adjacent_mem_locations (str_mem
, str_mem2
))
17844 /* Found an adjacent store. Move it to the head of the
17845 ready list, and adjust it's priority so that it is
17846 more likely to stay there */
17847 move_to_end_of_ready (ready
, pos
, lastpos
);
17849 if (!sel_sched_p ()
17850 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
17851 INSN_PRIORITY (ready
[lastpos
])++;
17853 first_store_pos
= -1;
17861 if (first_store_pos
>= 0)
17863 /* An adjacent store wasn't found, but a non-adjacent store was,
17864 so move the non-adjacent store to the front of the ready
17865 list, and adjust its priority so that it is more likely to
17867 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
17868 if (!sel_sched_p ()
17869 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
17870 INSN_PRIORITY (ready
[lastpos
])++;
17873 else if (load_store_pendulum
== 2)
17875 /* Two loads have been issued in this cycle. Increase the priority
17876 of the first store in the ready list to favor it for issuing in
17882 if (is_store_insn (ready
[pos
], &str_mem
)
17884 && INSN_PRIORITY_KNOWN (ready
[pos
]))
17886 INSN_PRIORITY (ready
[pos
])++;
17888 /* Adjust the pendulum to account for the fact that a store
17889 was found and increased in priority. This is to prevent
17890 increasing the priority of multiple stores */
17891 load_store_pendulum
++;
17899 return cached_can_issue_more
;
17902 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17905 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
17908 enum attr_type type
, type2
;
17910 type
= get_attr_type (last_scheduled_insn
);
17912 /* Try to issue fixed point divides back-to-back in pairs so they will be
17913 routed to separate execution units and execute in parallel. */
17914 if (type
== TYPE_DIV
&& divide_cnt
== 0)
17916 /* First divide has been scheduled. */
17919 /* Scan the ready list looking for another divide, if found move it
17920 to the end of the list so it is chosen next. */
17924 if (recog_memoized (ready
[pos
]) >= 0
17925 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
17927 move_to_end_of_ready (ready
, pos
, lastpos
);
17935 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17938 /* The best dispatch throughput for vector and vector load insns can be
17939 achieved by interleaving a vector and vector load such that they'll
17940 dispatch to the same superslice. If this pairing cannot be achieved
17941 then it is best to pair vector insns together and vector load insns
17944 To aid in this pairing, vec_pairing maintains the current state with
17945 the following values:
17947 0 : Initial state, no vecload/vector pairing has been started.
17949 1 : A vecload or vector insn has been issued and a candidate for
17950 pairing has been found and moved to the end of the ready
17952 if (type
== TYPE_VECLOAD
)
17954 /* Issued a vecload. */
17955 if (vec_pairing
== 0)
17957 int vecload_pos
= -1;
17958 /* We issued a single vecload, look for a vector insn to pair it
17959 with. If one isn't found, try to pair another vecload. */
17963 if (recog_memoized (ready
[pos
]) >= 0)
17965 type2
= get_attr_type (ready
[pos
]);
17966 if (is_power9_pairable_vec_type (type2
))
17968 /* Found a vector insn to pair with, move it to the
17969 end of the ready list so it is scheduled next. */
17970 move_to_end_of_ready (ready
, pos
, lastpos
);
17972 return cached_can_issue_more
;
17974 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
17975 /* Remember position of first vecload seen. */
17980 if (vecload_pos
>= 0)
17982 /* Didn't find a vector to pair with but did find a vecload,
17983 move it to the end of the ready list. */
17984 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
17986 return cached_can_issue_more
;
17990 else if (is_power9_pairable_vec_type (type
))
17992 /* Issued a vector operation. */
17993 if (vec_pairing
== 0)
17996 /* We issued a single vector insn, look for a vecload to pair it
17997 with. If one isn't found, try to pair another vector. */
18001 if (recog_memoized (ready
[pos
]) >= 0)
18003 type2
= get_attr_type (ready
[pos
]);
18004 if (type2
== TYPE_VECLOAD
)
18006 /* Found a vecload insn to pair with, move it to the
18007 end of the ready list so it is scheduled next. */
18008 move_to_end_of_ready (ready
, pos
, lastpos
);
18010 return cached_can_issue_more
;
18012 else if (is_power9_pairable_vec_type (type2
)
18014 /* Remember position of first vector insn seen. */
18021 /* Didn't find a vecload to pair with but did find a vector
18022 insn, move it to the end of the ready list. */
18023 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
18025 return cached_can_issue_more
;
18030 /* We've either finished a vec/vecload pair, couldn't find an insn to
18031 continue the current pair, or the last insn had nothing to do with
18032 with pairing. In any case, reset the state. */
18036 return cached_can_issue_more
;
18039 /* We are about to begin issuing insns for this clock cycle. */
18042 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
18043 rtx_insn
**ready ATTRIBUTE_UNUSED
,
18044 int *pn_ready ATTRIBUTE_UNUSED
,
18045 int clock_var ATTRIBUTE_UNUSED
)
18047 int n_ready
= *pn_ready
;
18050 fprintf (dump
, "// rs6000_sched_reorder :\n");
18052 /* Reorder the ready list, if the second to last ready insn
18053 is a nonepipeline insn. */
18054 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
18056 if (is_nonpipeline_insn (ready
[n_ready
- 1])
18057 && (recog_memoized (ready
[n_ready
- 2]) > 0))
18058 /* Simply swap first two insns. */
18059 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
18062 if (rs6000_tune
== PROCESSOR_POWER6
)
18063 load_store_pendulum
= 0;
18065 return rs6000_issue_rate ();
18068 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18071 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
18072 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
18075 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
18077 /* Do Power6 dependent reordering if necessary. */
18078 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
18079 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
18081 /* Do Power9 dependent reordering if necessary. */
18082 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
18083 && recog_memoized (last_scheduled_insn
) >= 0)
18084 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
18086 return cached_can_issue_more
;
18089 /* Return whether the presence of INSN causes a dispatch group termination
18090 of group WHICH_GROUP.
18092 If WHICH_GROUP == current_group, this function will return true if INSN
18093 causes the termination of the current group (i.e, the dispatch group to
18094 which INSN belongs). This means that INSN will be the last insn in the
18095 group it belongs to.
18097 If WHICH_GROUP == previous_group, this function will return true if INSN
18098 causes the termination of the previous group (i.e, the dispatch group that
18099 precedes the group to which INSN belongs). This means that INSN will be
18100 the first insn in the group it belongs to). */
18103 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
18110 first
= insn_must_be_first_in_group (insn
);
18111 last
= insn_must_be_last_in_group (insn
);
18116 if (which_group
== current_group
)
18118 else if (which_group
== previous_group
)
18126 insn_must_be_first_in_group (rtx_insn
*insn
)
18128 enum attr_type type
;
18132 || DEBUG_INSN_P (insn
)
18133 || GET_CODE (PATTERN (insn
)) == USE
18134 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18137 switch (rs6000_tune
)
18139 case PROCESSOR_POWER5
:
18140 if (is_cracked_insn (insn
))
18143 case PROCESSOR_POWER4
:
18144 if (is_microcoded_insn (insn
))
18147 if (!rs6000_sched_groups
)
18150 type
= get_attr_type (insn
);
18157 case TYPE_CR_LOGICAL
:
18170 case PROCESSOR_POWER6
:
18171 type
= get_attr_type (insn
);
18180 case TYPE_FPCOMPARE
:
18191 if (get_attr_dot (insn
) == DOT_NO
18192 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18197 if (get_attr_size (insn
) == SIZE_32
)
18205 if (get_attr_update (insn
) == UPDATE_YES
)
18213 case PROCESSOR_POWER7
:
18214 type
= get_attr_type (insn
);
18218 case TYPE_CR_LOGICAL
:
18232 if (get_attr_dot (insn
) == DOT_YES
)
18237 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18238 || get_attr_update (insn
) == UPDATE_YES
)
18245 if (get_attr_update (insn
) == UPDATE_YES
)
18253 case PROCESSOR_POWER8
:
18254 type
= get_attr_type (insn
);
18258 case TYPE_CR_LOGICAL
:
18266 case TYPE_VECSTORE
:
18273 if (get_attr_dot (insn
) == DOT_YES
)
18278 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18279 || get_attr_update (insn
) == UPDATE_YES
)
18284 if (get_attr_update (insn
) == UPDATE_YES
18285 && get_attr_indexed (insn
) == INDEXED_YES
)
18301 insn_must_be_last_in_group (rtx_insn
*insn
)
18303 enum attr_type type
;
18307 || DEBUG_INSN_P (insn
)
18308 || GET_CODE (PATTERN (insn
)) == USE
18309 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18312 switch (rs6000_tune
) {
18313 case PROCESSOR_POWER4
:
18314 case PROCESSOR_POWER5
:
18315 if (is_microcoded_insn (insn
))
18318 if (is_branch_slot_insn (insn
))
18322 case PROCESSOR_POWER6
:
18323 type
= get_attr_type (insn
);
18331 case TYPE_FPCOMPARE
:
18342 if (get_attr_dot (insn
) == DOT_NO
18343 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18348 if (get_attr_size (insn
) == SIZE_32
)
18356 case PROCESSOR_POWER7
:
18357 type
= get_attr_type (insn
);
18367 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18368 && get_attr_update (insn
) == UPDATE_YES
)
18373 if (get_attr_update (insn
) == UPDATE_YES
18374 && get_attr_indexed (insn
) == INDEXED_YES
)
18382 case PROCESSOR_POWER8
:
18383 type
= get_attr_type (insn
);
18395 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18396 && get_attr_update (insn
) == UPDATE_YES
)
18401 if (get_attr_update (insn
) == UPDATE_YES
18402 && get_attr_indexed (insn
) == INDEXED_YES
)
18417 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18418 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18421 is_costly_group (rtx
*group_insns
, rtx next_insn
)
18424 int issue_rate
= rs6000_issue_rate ();
18426 for (i
= 0; i
< issue_rate
; i
++)
18428 sd_iterator_def sd_it
;
18430 rtx insn
= group_insns
[i
];
18435 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
18437 rtx next
= DEP_CON (dep
);
18439 if (next
== next_insn
18440 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
18448 /* Utility of the function redefine_groups.
18449 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18450 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18451 to keep it "far" (in a separate group) from GROUP_INSNS, following
18452 one of the following schemes, depending on the value of the flag
18453 -minsert_sched_nops = X:
18454 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18455 in order to force NEXT_INSN into a separate group.
18456 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18457 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18458 insertion (has a group just ended, how many vacant issue slots remain in the
18459 last group, and how many dispatch groups were encountered so far). */
18462 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
18463 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
18468 int issue_rate
= rs6000_issue_rate ();
18469 bool end
= *group_end
;
18472 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
18473 return can_issue_more
;
18475 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
18476 return can_issue_more
;
18478 force
= is_costly_group (group_insns
, next_insn
);
18480 return can_issue_more
;
18482 if (sched_verbose
> 6)
18483 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
18484 *group_count
,can_issue_more
);
18486 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
18489 can_issue_more
= 0;
18491 /* Since only a branch can be issued in the last issue_slot, it is
18492 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18493 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18494 in this case the last nop will start a new group and the branch
18495 will be forced to the new group. */
18496 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
18499 /* Do we have a special group ending nop? */
18500 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
18501 || rs6000_tune
== PROCESSOR_POWER8
)
18503 nop
= gen_group_ending_nop ();
18504 emit_insn_before (nop
, next_insn
);
18505 can_issue_more
= 0;
18508 while (can_issue_more
> 0)
18511 emit_insn_before (nop
, next_insn
);
18519 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
18521 int n_nops
= rs6000_sched_insert_nops
;
18523 /* Nops can't be issued from the branch slot, so the effective
18524 issue_rate for nops is 'issue_rate - 1'. */
18525 if (can_issue_more
== 0)
18526 can_issue_more
= issue_rate
;
18528 if (can_issue_more
== 0)
18530 can_issue_more
= issue_rate
- 1;
18533 for (i
= 0; i
< issue_rate
; i
++)
18535 group_insns
[i
] = 0;
18542 emit_insn_before (nop
, next_insn
);
18543 if (can_issue_more
== issue_rate
- 1) /* new group begins */
18546 if (can_issue_more
== 0)
18548 can_issue_more
= issue_rate
- 1;
18551 for (i
= 0; i
< issue_rate
; i
++)
18553 group_insns
[i
] = 0;
18559 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18562 /* Is next_insn going to start a new group? */
18565 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
18566 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
18567 || (can_issue_more
< issue_rate
&&
18568 insn_terminates_group_p (next_insn
, previous_group
)));
18569 if (*group_end
&& end
)
18572 if (sched_verbose
> 6)
18573 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
18574 *group_count
, can_issue_more
);
18575 return can_issue_more
;
18578 return can_issue_more
;
18581 /* This function tries to synch the dispatch groups that the compiler "sees"
18582 with the dispatch groups that the processor dispatcher is expected to
18583 form in practice. It tries to achieve this synchronization by forcing the
18584 estimated processor grouping on the compiler (as opposed to the function
18585 'pad_goups' which tries to force the scheduler's grouping on the processor).
18587 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18588 examines the (estimated) dispatch groups that will be formed by the processor
18589 dispatcher. It marks these group boundaries to reflect the estimated
18590 processor grouping, overriding the grouping that the scheduler had marked.
18591 Depending on the value of the flag '-minsert-sched-nops' this function can
18592 force certain insns into separate groups or force a certain distance between
18593 them by inserting nops, for example, if there exists a "costly dependence"
18596 The function estimates the group boundaries that the processor will form as
18597 follows: It keeps track of how many vacant issue slots are available after
18598 each insn. A subsequent insn will start a new group if one of the following
18600 - no more vacant issue slots remain in the current dispatch group.
18601 - only the last issue slot, which is the branch slot, is vacant, but the next
18602 insn is not a branch.
18603 - only the last 2 or less issue slots, including the branch slot, are vacant,
18604 which means that a cracked insn (which occupies two issue slots) can't be
18605 issued in this group.
18606 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18607 start a new group. */
18610 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
18613 rtx_insn
*insn
, *next_insn
;
18615 int can_issue_more
;
18618 int group_count
= 0;
18622 issue_rate
= rs6000_issue_rate ();
18623 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
18624 for (i
= 0; i
< issue_rate
; i
++)
18626 group_insns
[i
] = 0;
18628 can_issue_more
= issue_rate
;
18630 insn
= get_next_active_insn (prev_head_insn
, tail
);
18633 while (insn
!= NULL_RTX
)
18635 slot
= (issue_rate
- can_issue_more
);
18636 group_insns
[slot
] = insn
;
18638 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
18639 if (insn_terminates_group_p (insn
, current_group
))
18640 can_issue_more
= 0;
18642 next_insn
= get_next_active_insn (insn
, tail
);
18643 if (next_insn
== NULL_RTX
)
18644 return group_count
+ 1;
18646 /* Is next_insn going to start a new group? */
18648 = (can_issue_more
== 0
18649 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
18650 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
18651 || (can_issue_more
< issue_rate
&&
18652 insn_terminates_group_p (next_insn
, previous_group
)));
18654 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
18655 next_insn
, &group_end
, can_issue_more
,
18661 can_issue_more
= 0;
18662 for (i
= 0; i
< issue_rate
; i
++)
18664 group_insns
[i
] = 0;
18668 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
18669 PUT_MODE (next_insn
, VOIDmode
);
18670 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
18671 PUT_MODE (next_insn
, TImode
);
18674 if (can_issue_more
== 0)
18675 can_issue_more
= issue_rate
;
18678 return group_count
;
18681 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18682 dispatch group boundaries that the scheduler had marked. Pad with nops
18683 any dispatch groups which have vacant issue slots, in order to force the
18684 scheduler's grouping on the processor dispatcher. The function
18685 returns the number of dispatch groups found. */
18688 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
18691 rtx_insn
*insn
, *next_insn
;
18694 int can_issue_more
;
18696 int group_count
= 0;
18698 /* Initialize issue_rate. */
18699 issue_rate
= rs6000_issue_rate ();
18700 can_issue_more
= issue_rate
;
18702 insn
= get_next_active_insn (prev_head_insn
, tail
);
18703 next_insn
= get_next_active_insn (insn
, tail
);
18705 while (insn
!= NULL_RTX
)
18708 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
18710 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
18712 if (next_insn
== NULL_RTX
)
18717 /* If the scheduler had marked group termination at this location
18718 (between insn and next_insn), and neither insn nor next_insn will
18719 force group termination, pad the group with nops to force group
18722 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
18723 && !insn_terminates_group_p (insn
, current_group
)
18724 && !insn_terminates_group_p (next_insn
, previous_group
))
18726 if (!is_branch_slot_insn (next_insn
))
18729 while (can_issue_more
)
18732 emit_insn_before (nop
, next_insn
);
18737 can_issue_more
= issue_rate
;
18742 next_insn
= get_next_active_insn (insn
, tail
);
18745 return group_count
;
18748 /* We're beginning a new block. Initialize data structures as necessary. */
18751 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
18752 int sched_verbose ATTRIBUTE_UNUSED
,
18753 int max_ready ATTRIBUTE_UNUSED
)
18755 last_scheduled_insn
= NULL
;
18756 load_store_pendulum
= 0;
18761 /* The following function is called at the end of scheduling BB.
18762 After reload, it inserts nops at insn group bundling. */
18765 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
18770 fprintf (dump
, "=== Finishing schedule.\n");
18772 if (reload_completed
&& rs6000_sched_groups
)
18774 /* Do not run sched_finish hook when selective scheduling enabled. */
18775 if (sel_sched_p ())
18778 if (rs6000_sched_insert_nops
== sched_finish_none
)
18781 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
18782 n_groups
= pad_groups (dump
, sched_verbose
,
18783 current_sched_info
->prev_head
,
18784 current_sched_info
->next_tail
);
18786 n_groups
= redefine_groups (dump
, sched_verbose
,
18787 current_sched_info
->prev_head
,
18788 current_sched_info
->next_tail
);
18790 if (sched_verbose
>= 6)
18792 fprintf (dump
, "ngroups = %d\n", n_groups
);
18793 print_rtl (dump
, current_sched_info
->prev_head
);
18794 fprintf (dump
, "Done finish_sched\n");
18799 struct rs6000_sched_context
18801 short cached_can_issue_more
;
18802 rtx_insn
*last_scheduled_insn
;
18803 int load_store_pendulum
;
18808 typedef struct rs6000_sched_context rs6000_sched_context_def
;
18809 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
18811 /* Allocate store for new scheduling context. */
18813 rs6000_alloc_sched_context (void)
18815 return xmalloc (sizeof (rs6000_sched_context_def
));
18818 /* If CLEAN_P is true then initializes _SC with clean data,
18819 and from the global context otherwise. */
18821 rs6000_init_sched_context (void *_sc
, bool clean_p
)
18823 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
18827 sc
->cached_can_issue_more
= 0;
18828 sc
->last_scheduled_insn
= NULL
;
18829 sc
->load_store_pendulum
= 0;
18830 sc
->divide_cnt
= 0;
18831 sc
->vec_pairing
= 0;
18835 sc
->cached_can_issue_more
= cached_can_issue_more
;
18836 sc
->last_scheduled_insn
= last_scheduled_insn
;
18837 sc
->load_store_pendulum
= load_store_pendulum
;
18838 sc
->divide_cnt
= divide_cnt
;
18839 sc
->vec_pairing
= vec_pairing
;
18843 /* Sets the global scheduling context to the one pointed to by _SC. */
18845 rs6000_set_sched_context (void *_sc
)
18847 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
18849 gcc_assert (sc
!= NULL
);
18851 cached_can_issue_more
= sc
->cached_can_issue_more
;
18852 last_scheduled_insn
= sc
->last_scheduled_insn
;
18853 load_store_pendulum
= sc
->load_store_pendulum
;
18854 divide_cnt
= sc
->divide_cnt
;
18855 vec_pairing
= sc
->vec_pairing
;
18860 rs6000_free_sched_context (void *_sc
)
18862 gcc_assert (_sc
!= NULL
);
18868 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
18870 switch (get_attr_type (insn
))
18885 /* Length in units of the trampoline for entering a nested function. */
18888 rs6000_trampoline_size (void)
18892 switch (DEFAULT_ABI
)
18895 gcc_unreachable ();
18898 ret
= (TARGET_32BIT
) ? 12 : 24;
18902 gcc_assert (!TARGET_32BIT
);
18908 ret
= (TARGET_32BIT
) ? 40 : 48;
18915 /* Emit RTL insns to initialize the variable parts of a trampoline.
18916 FNADDR is an RTX for the address of the function's pure code.
18917 CXT is an RTX for the static chain value for the function. */
18920 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
18922 int regsize
= (TARGET_32BIT
) ? 4 : 8;
18923 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
18924 rtx ctx_reg
= force_reg (Pmode
, cxt
);
18925 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
18927 switch (DEFAULT_ABI
)
18930 gcc_unreachable ();
18932 /* Under AIX, just build the 3 word function descriptor */
18935 rtx fnmem
, fn_reg
, toc_reg
;
18937 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
18938 error ("you cannot take the address of a nested function if you use "
18939 "the %qs option", "-mno-pointers-to-nested-functions");
18941 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
18942 fn_reg
= gen_reg_rtx (Pmode
);
18943 toc_reg
= gen_reg_rtx (Pmode
);
18945 /* Macro to shorten the code expansions below. */
18946 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18948 m_tramp
= replace_equiv_address (m_tramp
, addr
);
18950 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
18951 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
18952 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
18953 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
18954 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
18960 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18964 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
18965 LCT_NORMAL
, VOIDmode
,
18967 GEN_INT (rs6000_trampoline_size ()), SImode
,
18975 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
18976 identifier as an argument, so the front end shouldn't look it up. */
18979 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
18981 return is_attribute_p ("altivec", attr_id
);
18984 /* Handle the "altivec" attribute. The attribute may have
18985 arguments as follows:
18987 __attribute__((altivec(vector__)))
18988 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
18989 __attribute__((altivec(bool__))) (always followed by 'unsigned')
18991 and may appear more than once (e.g., 'vector bool char') in a
18992 given declaration. */
18995 rs6000_handle_altivec_attribute (tree
*node
,
18996 tree name ATTRIBUTE_UNUSED
,
18998 int flags ATTRIBUTE_UNUSED
,
18999 bool *no_add_attrs
)
19001 tree type
= *node
, result
= NULL_TREE
;
19005 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
19006 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
19007 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
19010 while (POINTER_TYPE_P (type
)
19011 || TREE_CODE (type
) == FUNCTION_TYPE
19012 || TREE_CODE (type
) == METHOD_TYPE
19013 || TREE_CODE (type
) == ARRAY_TYPE
)
19014 type
= TREE_TYPE (type
);
19016 mode
= TYPE_MODE (type
);
19018 /* Check for invalid AltiVec type qualifiers. */
19019 if (type
== long_double_type_node
)
19020 error ("use of %<long double%> in AltiVec types is invalid");
19021 else if (type
== boolean_type_node
)
19022 error ("use of boolean types in AltiVec types is invalid");
19023 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
19024 error ("use of %<complex%> in AltiVec types is invalid");
19025 else if (DECIMAL_FLOAT_MODE_P (mode
))
19026 error ("use of decimal floating point types in AltiVec types is invalid");
19027 else if (!TARGET_VSX
)
19029 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
19032 error ("use of %<long%> in AltiVec types is invalid for "
19033 "64-bit code without %qs", "-mvsx");
19034 else if (rs6000_warn_altivec_long
)
19035 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19038 else if (type
== long_long_unsigned_type_node
19039 || type
== long_long_integer_type_node
)
19040 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19042 else if (type
== double_type_node
)
19043 error ("use of %<double%> in AltiVec types is invalid without %qs",
19047 switch (altivec_type
)
19050 unsigned_p
= TYPE_UNSIGNED (type
);
19054 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
19057 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
19060 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
19063 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
19066 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
19068 case E_SFmode
: result
= V4SF_type_node
; break;
19069 case E_DFmode
: result
= V2DF_type_node
; break;
19070 /* If the user says 'vector int bool', we may be handed the 'bool'
19071 attribute _before_ the 'vector' attribute, and so select the
19072 proper type in the 'b' case below. */
19073 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
19074 case E_V2DImode
: case E_V2DFmode
:
19082 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
19083 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
19084 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
19085 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
19092 case E_V8HImode
: result
= pixel_V8HI_type_node
;
19098 /* Propagate qualifiers attached to the element type
19099 onto the vector type. */
19100 if (result
&& result
!= type
&& TYPE_QUALS (type
))
19101 result
= build_qualified_type (result
, TYPE_QUALS (type
));
19103 *no_add_attrs
= true; /* No need to hang on to the attribute. */
19106 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
19111 /* AltiVec defines five built-in scalar types that serve as vector
19112 elements; we must teach the compiler how to mangle them. The 128-bit
19113 floating point mangling is target-specific as well. */
19115 static const char *
19116 rs6000_mangle_type (const_tree type
)
19118 type
= TYPE_MAIN_VARIANT (type
);
19120 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
19121 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
19124 if (type
== bool_char_type_node
) return "U6__boolc";
19125 if (type
== bool_short_type_node
) return "U6__bools";
19126 if (type
== pixel_type_node
) return "u7__pixel";
19127 if (type
== bool_int_type_node
) return "U6__booli";
19128 if (type
== bool_long_long_type_node
) return "U6__boolx";
19130 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
19132 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
19133 return ieee128_mangling_gcc_8_1
? "U10__float128" : "u9__ieee128";
19135 /* For all other types, use the default mangling. */
19139 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19140 struct attribute_spec.handler. */
19143 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
19144 tree args ATTRIBUTE_UNUSED
,
19145 int flags ATTRIBUTE_UNUSED
,
19146 bool *no_add_attrs
)
19148 if (TREE_CODE (*node
) != FUNCTION_TYPE
19149 && TREE_CODE (*node
) != FIELD_DECL
19150 && TREE_CODE (*node
) != TYPE_DECL
)
19152 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
19154 *no_add_attrs
= true;
19160 /* Set longcall attributes on all functions declared when
19161 rs6000_default_long_calls is true. */
19163 rs6000_set_default_type_attributes (tree type
)
19165 if (rs6000_default_long_calls
19166 && (TREE_CODE (type
) == FUNCTION_TYPE
19167 || TREE_CODE (type
) == METHOD_TYPE
))
19168 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
19170 TYPE_ATTRIBUTES (type
));
19173 darwin_set_default_type_attributes (type
);
19177 /* Return a reference suitable for calling a function with the
19178 longcall attribute. */
19181 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
19183 /* System V adds '.' to the internal name, so skip them. */
19184 const char *call_name
= XSTR (call_ref
, 0);
19185 if (*call_name
== '.')
19187 while (*call_name
== '.')
19190 tree node
= get_identifier (call_name
);
19191 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
19196 rtx base
= const0_rtx
;
19198 if (rs6000_pcrel_p (cfun
))
19200 rtx reg
= gen_rtx_REG (Pmode
, regno
);
19201 rtx u
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
19203 emit_insn (gen_rtx_SET (reg
, u
));
19207 if (DEFAULT_ABI
== ABI_ELFv2
)
19208 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
19212 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
19215 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19216 may be used by a function global entry point. For SysV4, r11
19217 is used by __glink_PLTresolve lazy resolver entry. */
19218 rtx reg
= gen_rtx_REG (Pmode
, regno
);
19219 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
19221 rtx lo
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, reg
, call_ref
, arg
),
19223 emit_insn (gen_rtx_SET (reg
, hi
));
19224 emit_insn (gen_rtx_SET (reg
, lo
));
19228 return force_reg (Pmode
, call_ref
);
19231 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19232 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19235 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19236 struct attribute_spec.handler. */
19238 rs6000_handle_struct_attribute (tree
*node
, tree name
,
19239 tree args ATTRIBUTE_UNUSED
,
19240 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19243 if (DECL_P (*node
))
19245 if (TREE_CODE (*node
) == TYPE_DECL
)
19246 type
= &TREE_TYPE (*node
);
19251 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19252 || TREE_CODE (*type
) == UNION_TYPE
)))
19254 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
19255 *no_add_attrs
= true;
19258 else if ((is_attribute_p ("ms_struct", name
)
19259 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19260 || ((is_attribute_p ("gcc_struct", name
)
19261 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19263 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
19265 *no_add_attrs
= true;
19272 rs6000_ms_bitfield_layout_p (const_tree record_type
)
19274 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
19275 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19279 #ifdef USING_ELFOS_H
19281 /* A get_unnamed_section callback, used for switching to toc_section. */
19284 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
19286 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
19287 && TARGET_MINIMAL_TOC
)
19289 if (!toc_initialized
)
19291 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
19292 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
19293 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
19294 fprintf (asm_out_file
, "\t.tc ");
19295 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
19296 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
19297 fprintf (asm_out_file
, "\n");
19299 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
19300 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
19301 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
19302 fprintf (asm_out_file
, " = .+32768\n");
19303 toc_initialized
= 1;
19306 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
19308 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
19310 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
19311 if (!toc_initialized
)
19313 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
19314 toc_initialized
= 1;
19319 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
19320 if (!toc_initialized
)
19322 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
19323 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
19324 fprintf (asm_out_file
, " = .+32768\n");
19325 toc_initialized
= 1;
19330 /* Implement TARGET_ASM_INIT_SECTIONS. */
19333 rs6000_elf_asm_init_sections (void)
19336 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
19339 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
19340 SDATA2_SECTION_ASM_OP
);
19343 /* Implement TARGET_SELECT_RTX_SECTION. */
19346 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
19347 unsigned HOST_WIDE_INT align
)
19349 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
19350 return toc_section
;
19352 return default_elf_select_rtx_section (mode
, x
, align
);
19355 /* For a SYMBOL_REF, set generic flags and then perform some
19356 target-specific processing.
19358 When the AIX ABI is requested on a non-AIX system, replace the
19359 function name with the real name (with a leading .) rather than the
19360 function descriptor name. This saves a lot of overriding code to
19361 read the prefixes. */
19363 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
19365 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
19367 default_encode_section_info (decl
, rtl
, first
);
19370 && TREE_CODE (decl
) == FUNCTION_DECL
19372 && DEFAULT_ABI
== ABI_AIX
)
19374 rtx sym_ref
= XEXP (rtl
, 0);
19375 size_t len
= strlen (XSTR (sym_ref
, 0));
19376 char *str
= XALLOCAVEC (char, len
+ 2);
19378 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
19379 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
19384 compare_section_name (const char *section
, const char *templ
)
19388 len
= strlen (templ
);
19389 return (strncmp (section
, templ
, len
) == 0
19390 && (section
[len
] == 0 || section
[len
] == '.'));
19394 rs6000_elf_in_small_data_p (const_tree decl
)
19396 if (rs6000_sdata
== SDATA_NONE
)
19399 /* We want to merge strings, so we never consider them small data. */
19400 if (TREE_CODE (decl
) == STRING_CST
)
19403 /* Functions are never in the small data area. */
19404 if (TREE_CODE (decl
) == FUNCTION_DECL
)
19407 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
19409 const char *section
= DECL_SECTION_NAME (decl
);
19410 if (compare_section_name (section
, ".sdata")
19411 || compare_section_name (section
, ".sdata2")
19412 || compare_section_name (section
, ".gnu.linkonce.s")
19413 || compare_section_name (section
, ".sbss")
19414 || compare_section_name (section
, ".sbss2")
19415 || compare_section_name (section
, ".gnu.linkonce.sb")
19416 || strcmp (section
, ".PPC.EMB.sdata0") == 0
19417 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
19422 /* If we are told not to put readonly data in sdata, then don't. */
19423 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
19424 && !rs6000_readonly_in_sdata
)
19427 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
19430 && size
<= g_switch_value
19431 /* If it's not public, and we're not going to reference it there,
19432 there's no need to put it in the small data section. */
19433 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
19440 #endif /* USING_ELFOS_H */
19442 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19445 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
19447 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
19450 /* Do not place thread-local symbols refs in the object blocks. */
19453 rs6000_use_blocks_for_decl_p (const_tree decl
)
19455 return !DECL_THREAD_LOCAL_P (decl
);
19458 /* Return a REG that occurs in ADDR with coefficient 1.
19459 ADDR can be effectively incremented by incrementing REG.
19461 r0 is special and we must not select it as an address
19462 register by this routine since our caller will try to
19463 increment the returned register via an "la" instruction. */
19466 find_addr_reg (rtx addr
)
19468 while (GET_CODE (addr
) == PLUS
)
19470 if (REG_P (XEXP (addr
, 0))
19471 && REGNO (XEXP (addr
, 0)) != 0)
19472 addr
= XEXP (addr
, 0);
19473 else if (REG_P (XEXP (addr
, 1))
19474 && REGNO (XEXP (addr
, 1)) != 0)
19475 addr
= XEXP (addr
, 1);
19476 else if (CONSTANT_P (XEXP (addr
, 0)))
19477 addr
= XEXP (addr
, 1);
19478 else if (CONSTANT_P (XEXP (addr
, 1)))
19479 addr
= XEXP (addr
, 0);
19481 gcc_unreachable ();
19483 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
19488 rs6000_fatal_bad_address (rtx op
)
19490 fatal_insn ("bad address", op
);
19495 vec
<branch_island
, va_gc
> *branch_islands
;
19497 /* Remember to generate a branch island for far calls to the given
19501 add_compiler_branch_island (tree label_name
, tree function_name
,
19504 branch_island bi
= {function_name
, label_name
, line_number
};
19505 vec_safe_push (branch_islands
, bi
);
19508 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19509 already there or not. */
19512 no_previous_def (tree function_name
)
19517 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
19518 if (function_name
== bi
->function_name
)
19523 /* GET_PREV_LABEL gets the label name from the previous definition of
19527 get_prev_label (tree function_name
)
19532 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
19533 if (function_name
== bi
->function_name
)
19534 return bi
->label_name
;
19538 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19541 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19543 unsigned int length
;
19544 char *symbol_name
, *lazy_ptr_name
;
19545 char *local_label_0
;
19546 static unsigned label
= 0;
19548 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19549 symb
= (*targetm
.strip_name_encoding
) (symb
);
19551 length
= strlen (symb
);
19552 symbol_name
= XALLOCAVEC (char, length
+ 32);
19553 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19555 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
19556 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
19560 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
19561 fprintf (file
, "\t.align 5\n");
19563 fprintf (file
, "%s:\n", stub
);
19564 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19567 local_label_0
= XALLOCAVEC (char, 16);
19568 sprintf (local_label_0
, "L%u$spb", label
);
19570 fprintf (file
, "\tmflr r0\n");
19571 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
19572 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
19573 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
19574 lazy_ptr_name
, local_label_0
);
19575 fprintf (file
, "\tmtlr r0\n");
19576 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
19577 (TARGET_64BIT
? "ldu" : "lwzu"),
19578 lazy_ptr_name
, local_label_0
);
19579 fprintf (file
, "\tmtctr r12\n");
19580 fprintf (file
, "\tbctr\n");
19582 else /* mdynamic-no-pic or mkernel. */
19584 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
19585 fprintf (file
, "\t.align 4\n");
19587 fprintf (file
, "%s:\n", stub
);
19588 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19590 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
19591 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
19592 (TARGET_64BIT
? "ldu" : "lwzu"),
19594 fprintf (file
, "\tmtctr r12\n");
19595 fprintf (file
, "\tbctr\n");
19598 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19599 fprintf (file
, "%s:\n", lazy_ptr_name
);
19600 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19601 fprintf (file
, "%sdyld_stub_binding_helper\n",
19602 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
19605 /* Legitimize PIC addresses. If the address is already
19606 position-independent, we return ORIG. Newly generated
19607 position-independent addresses go into a reg. This is REG if non
19608 zero, otherwise we allocate register(s) as necessary. */
19610 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19613 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
19618 if (reg
== NULL
&& !reload_completed
)
19619 reg
= gen_reg_rtx (Pmode
);
19621 if (GET_CODE (orig
) == CONST
)
19625 if (GET_CODE (XEXP (orig
, 0)) == PLUS
19626 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
19629 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
19631 /* Use a different reg for the intermediate value, as
19632 it will be marked UNCHANGING. */
19633 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
19634 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
19637 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
19640 if (CONST_INT_P (offset
))
19642 if (SMALL_INT (offset
))
19643 return plus_constant (Pmode
, base
, INTVAL (offset
));
19644 else if (!reload_completed
)
19645 offset
= force_reg (Pmode
, offset
);
19648 rtx mem
= force_const_mem (Pmode
, orig
);
19649 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
19652 return gen_rtx_PLUS (Pmode
, base
, offset
);
19655 /* Fall back on generic machopic code. */
19656 return machopic_legitimize_pic_address (orig
, mode
, reg
);
19659 /* Output a .machine directive for the Darwin assembler, and call
19660 the generic start_file routine. */
19663 rs6000_darwin_file_start (void)
19665 static const struct
19669 HOST_WIDE_INT if_set
;
19671 { "ppc64", "ppc64", MASK_64BIT
},
19672 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
19673 { "power4", "ppc970", 0 },
19674 { "G5", "ppc970", 0 },
19675 { "7450", "ppc7450", 0 },
19676 { "7400", "ppc7400", MASK_ALTIVEC
},
19677 { "G4", "ppc7400", 0 },
19678 { "750", "ppc750", 0 },
19679 { "740", "ppc750", 0 },
19680 { "G3", "ppc750", 0 },
19681 { "604e", "ppc604e", 0 },
19682 { "604", "ppc604", 0 },
19683 { "603e", "ppc603", 0 },
19684 { "603", "ppc603", 0 },
19685 { "601", "ppc601", 0 },
19686 { NULL
, "ppc", 0 } };
19687 const char *cpu_id
= "";
19690 rs6000_file_start ();
19691 darwin_file_start ();
19693 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19695 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
19696 cpu_id
= rs6000_default_cpu
;
19698 if (global_options_set
.x_rs6000_cpu_index
)
19699 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
19701 /* Look through the mapping array. Pick the first name that either
19702 matches the argument, has a bit set in IF_SET that is also set
19703 in the target flags, or has a NULL name. */
19706 while (mapping
[i
].arg
!= NULL
19707 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
19708 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
19711 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
19714 #endif /* TARGET_MACHO */
19718 rs6000_elf_reloc_rw_mask (void)
19722 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
19728 /* Record an element in the table of global constructors. SYMBOL is
19729 a SYMBOL_REF of the function to be called; PRIORITY is a number
19730 between 0 and MAX_INIT_PRIORITY.
19732 This differs from default_named_section_asm_out_constructor in
19733 that we have special handling for -mrelocatable. */
19735 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
19737 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
19739 const char *section
= ".ctors";
19742 if (priority
!= DEFAULT_INIT_PRIORITY
)
19744 sprintf (buf
, ".ctors.%.5u",
19745 /* Invert the numbering so the linker puts us in the proper
19746 order; constructors are run from right to left, and the
19747 linker sorts in increasing order. */
19748 MAX_INIT_PRIORITY
- priority
);
19752 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
19753 assemble_align (POINTER_SIZE
);
19755 if (DEFAULT_ABI
== ABI_V4
19756 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
19758 fputs ("\t.long (", asm_out_file
);
19759 output_addr_const (asm_out_file
, symbol
);
19760 fputs (")@fixup\n", asm_out_file
);
19763 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
19766 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
19768 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
19770 const char *section
= ".dtors";
19773 if (priority
!= DEFAULT_INIT_PRIORITY
)
19775 sprintf (buf
, ".dtors.%.5u",
19776 /* Invert the numbering so the linker puts us in the proper
19777 order; constructors are run from right to left, and the
19778 linker sorts in increasing order. */
19779 MAX_INIT_PRIORITY
- priority
);
19783 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
19784 assemble_align (POINTER_SIZE
);
19786 if (DEFAULT_ABI
== ABI_V4
19787 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
19789 fputs ("\t.long (", asm_out_file
);
19790 output_addr_const (asm_out_file
, symbol
);
19791 fputs (")@fixup\n", asm_out_file
);
19794 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
19798 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
19800 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
19802 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
19803 ASM_OUTPUT_LABEL (file
, name
);
19804 fputs (DOUBLE_INT_ASM_OP
, file
);
19805 rs6000_output_function_entry (file
, name
);
19806 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
19809 fputs ("\t.size\t", file
);
19810 assemble_name (file
, name
);
19811 fputs (",24\n\t.type\t.", file
);
19812 assemble_name (file
, name
);
19813 fputs (",@function\n", file
);
19814 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
19816 fputs ("\t.globl\t.", file
);
19817 assemble_name (file
, name
);
19822 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19823 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19824 rs6000_output_function_entry (file
, name
);
19825 fputs (":\n", file
);
19830 if (DEFAULT_ABI
== ABI_V4
19831 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
19832 && !TARGET_SECURE_PLT
19833 && (!constant_pool_empty_p () || crtl
->profile
)
19834 && (uses_toc
= uses_TOC ()))
19839 switch_to_other_text_partition ();
19840 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
19842 fprintf (file
, "\t.long ");
19843 assemble_name (file
, toc_label_name
);
19846 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
19847 assemble_name (file
, buf
);
19850 switch_to_other_text_partition ();
19853 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19854 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19856 if (TARGET_CMODEL
== CMODEL_LARGE
19857 && rs6000_global_entry_point_prologue_needed_p ())
19861 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
19863 fprintf (file
, "\t.quad .TOC.-");
19864 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
19865 assemble_name (file
, buf
);
19869 if (DEFAULT_ABI
== ABI_AIX
)
19871 const char *desc_name
, *orig_name
;
19873 orig_name
= (*targetm
.strip_name_encoding
) (name
);
19874 desc_name
= orig_name
;
19875 while (*desc_name
== '.')
19878 if (TREE_PUBLIC (decl
))
19879 fprintf (file
, "\t.globl %s\n", desc_name
);
19881 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
19882 fprintf (file
, "%s:\n", desc_name
);
19883 fprintf (file
, "\t.long %s\n", orig_name
);
19884 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
19885 fputs ("\t.long 0\n", file
);
19886 fprintf (file
, "\t.previous\n");
19888 ASM_OUTPUT_LABEL (file
, name
);
19891 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
19893 rs6000_elf_file_end (void)
19895 #ifdef HAVE_AS_GNU_ATTRIBUTE
19896 /* ??? The value emitted depends on options active at file end.
19897 Assume anyone using #pragma or attributes that might change
19898 options knows what they are doing. */
19899 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
19900 && rs6000_passes_float
)
19904 if (TARGET_HARD_FLOAT
)
19908 if (rs6000_passes_long_double
)
19910 if (!TARGET_LONG_DOUBLE_128
)
19912 else if (TARGET_IEEEQUAD
)
19917 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
19919 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
19921 if (rs6000_passes_vector
)
19922 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
19923 (TARGET_ALTIVEC_ABI
? 2 : 1));
19924 if (rs6000_returns_struct
)
19925 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
19926 aix_struct_return
? 2 : 1);
19929 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19930 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
19931 file_end_indicate_exec_stack ();
19934 if (flag_split_stack
)
19935 file_end_indicate_split_stack ();
19939 /* We have expanded a CPU builtin, so we need to emit a reference to
19940 the special symbol that LIBC uses to declare it supports the
19941 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19942 switch_to_section (data_section
);
19943 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
19944 fprintf (asm_out_file
, "\t%s %s\n",
19945 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
19952 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19953 #define HAVE_XCOFF_DWARF_EXTRAS 0
19956 static enum unwind_info_type
19957 rs6000_xcoff_debug_unwind_info (void)
19963 rs6000_xcoff_asm_output_anchor (rtx symbol
)
19967 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
19968 SYMBOL_REF_BLOCK_OFFSET (symbol
));
19969 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
19970 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
19971 fprintf (asm_out_file
, ",");
19972 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
19973 fprintf (asm_out_file
, "\n");
19977 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
19979 fputs (GLOBAL_ASM_OP
, stream
);
19980 RS6000_OUTPUT_BASENAME (stream
, name
);
19981 putc ('\n', stream
);
19984 /* A get_unnamed_decl callback, used for read-only sections. PTR
19985 points to the section string variable. */
19988 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
19990 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
19991 *(const char *const *) directive
,
19992 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
19995 /* Likewise for read-write sections. */
19998 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
20000 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
20001 *(const char *const *) directive
,
20002 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20006 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
20008 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
20009 *(const char *const *) directive
,
20010 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20013 /* A get_unnamed_section callback, used for switching to toc_section. */
20016 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
20018 if (TARGET_MINIMAL_TOC
)
20020 /* toc_section is always selected at least once from
20021 rs6000_xcoff_file_start, so this is guaranteed to
20022 always be defined once and only once in each file. */
20023 if (!toc_initialized
)
20025 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
20026 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
20027 toc_initialized
= 1;
20029 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
20030 (TARGET_32BIT
? "" : ",3"));
20033 fputs ("\t.toc\n", asm_out_file
);
20036 /* Implement TARGET_ASM_INIT_SECTIONS. */
20039 rs6000_xcoff_asm_init_sections (void)
20041 read_only_data_section
20042 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
20043 &xcoff_read_only_section_name
);
20045 private_data_section
20046 = get_unnamed_section (SECTION_WRITE
,
20047 rs6000_xcoff_output_readwrite_section_asm_op
,
20048 &xcoff_private_data_section_name
);
20050 read_only_private_data_section
20051 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
20052 &xcoff_private_rodata_section_name
);
20055 = get_unnamed_section (SECTION_TLS
,
20056 rs6000_xcoff_output_tls_section_asm_op
,
20057 &xcoff_tls_data_section_name
);
20059 tls_private_data_section
20060 = get_unnamed_section (SECTION_TLS
,
20061 rs6000_xcoff_output_tls_section_asm_op
,
20062 &xcoff_private_data_section_name
);
20065 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
20067 readonly_data_section
= read_only_data_section
;
20071 rs6000_xcoff_reloc_rw_mask (void)
20077 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
20078 tree decl ATTRIBUTE_UNUSED
)
20081 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
20083 if (flags
& SECTION_EXCLUDE
)
20085 else if (flags
& SECTION_DEBUG
)
20087 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
20090 else if (flags
& SECTION_CODE
)
20092 else if (flags
& SECTION_TLS
)
20094 else if (flags
& SECTION_WRITE
)
20099 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
20100 (flags
& SECTION_CODE
) ? "." : "",
20101 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
20104 #define IN_NAMED_SECTION(DECL) \
20105 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20106 && DECL_SECTION_NAME (DECL) != NULL)
20109 rs6000_xcoff_select_section (tree decl
, int reloc
,
20110 unsigned HOST_WIDE_INT align
)
20112 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20114 if (align
> BIGGEST_ALIGNMENT
)
20116 resolve_unique_section (decl
, reloc
, true);
20117 if (IN_NAMED_SECTION (decl
))
20118 return get_named_section (decl
, NULL
, reloc
);
20121 if (decl_readonly_section (decl
, reloc
))
20123 if (TREE_PUBLIC (decl
))
20124 return read_only_data_section
;
20126 return read_only_private_data_section
;
20131 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
20133 if (TREE_PUBLIC (decl
))
20134 return tls_data_section
;
20135 else if (bss_initializer_p (decl
))
20137 /* Convert to COMMON to emit in BSS. */
20138 DECL_COMMON (decl
) = 1;
20139 return tls_comm_section
;
20142 return tls_private_data_section
;
20146 if (TREE_PUBLIC (decl
))
20147 return data_section
;
20149 return private_data_section
;
20154 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
20158 /* Use select_section for private data and uninitialized data with
20159 alignment <= BIGGEST_ALIGNMENT. */
20160 if (!TREE_PUBLIC (decl
)
20161 || DECL_COMMON (decl
)
20162 || (DECL_INITIAL (decl
) == NULL_TREE
20163 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
20164 || DECL_INITIAL (decl
) == error_mark_node
20165 || (flag_zero_initialized_in_bss
20166 && initializer_zerop (DECL_INITIAL (decl
))))
20169 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
20170 name
= (*targetm
.strip_name_encoding
) (name
);
20171 set_decl_section_name (decl
, name
);
20174 /* Select section for constant in constant pool.
20176 On RS/6000, all constants are in the private read-only data area.
20177 However, if this is being placed in the TOC it must be output as a
20181 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
20182 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
20184 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20185 return toc_section
;
20187 return read_only_private_data_section
;
20190 /* Remove any trailing [DS] or the like from the symbol name. */
20192 static const char *
20193 rs6000_xcoff_strip_name_encoding (const char *name
)
20198 len
= strlen (name
);
20199 if (name
[len
- 1] == ']')
20200 return ggc_alloc_string (name
, len
- 4);
20205 /* Section attributes. AIX is always PIC. */
20207 static unsigned int
20208 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
20210 unsigned int align
;
20211 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
20213 /* Align to at least UNIT size. */
20214 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
20215 align
= MIN_UNITS_PER_WORD
;
20217 /* Increase alignment of large objects if not already stricter. */
20218 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
20219 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
20220 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
20222 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
20225 /* Output at beginning of assembler file.
20227 Initialize the section names for the RS/6000 at this point.
20229 Specify filename, including full path, to assembler.
20231 We want to go into the TOC section so at least one .toc will be emitted.
20232 Also, in order to output proper .bs/.es pairs, we need at least one static
20233 [RW] section emitted.
20235 Finally, declare mcount when profiling to make the assembler happy. */
20238 rs6000_xcoff_file_start (void)
20240 rs6000_gen_section_name (&xcoff_bss_section_name
,
20241 main_input_filename
, ".bss_");
20242 rs6000_gen_section_name (&xcoff_private_data_section_name
,
20243 main_input_filename
, ".rw_");
20244 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
20245 main_input_filename
, ".rop_");
20246 rs6000_gen_section_name (&xcoff_read_only_section_name
,
20247 main_input_filename
, ".ro_");
20248 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
20249 main_input_filename
, ".tls_");
20250 rs6000_gen_section_name (&xcoff_tbss_section_name
,
20251 main_input_filename
, ".tbss_[UL]");
20253 fputs ("\t.file\t", asm_out_file
);
20254 output_quoted_string (asm_out_file
, main_input_filename
);
20255 fputc ('\n', asm_out_file
);
20256 if (write_symbols
!= NO_DEBUG
)
20257 switch_to_section (private_data_section
);
20258 switch_to_section (toc_section
);
20259 switch_to_section (text_section
);
20261 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
20262 rs6000_file_start ();
20265 /* Output at end of assembler file.
20266 On the RS/6000, referencing data should automatically pull in text. */
20269 rs6000_xcoff_file_end (void)
20271 switch_to_section (text_section
);
20272 fputs ("_section_.text:\n", asm_out_file
);
20273 switch_to_section (data_section
);
20274 fputs (TARGET_32BIT
20275 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20279 struct declare_alias_data
20282 bool function_descriptor
;
20285 /* Declare alias N. A helper function for for_node_and_aliases. */
20288 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
20290 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
20291 /* Main symbol is output specially, because varasm machinery does part of
20292 the job for us - we do not need to declare .globl/lglobs and such. */
20293 if (!n
->alias
|| n
->weakref
)
20296 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
20299 /* Prevent assemble_alias from trying to use .set pseudo operation
20300 that does not behave as expected by the middle-end. */
20301 TREE_ASM_WRITTEN (n
->decl
) = true;
20303 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
20304 char *buffer
= (char *) alloca (strlen (name
) + 2);
20306 int dollar_inside
= 0;
20308 strcpy (buffer
, name
);
20309 p
= strchr (buffer
, '$');
20313 p
= strchr (p
+ 1, '$');
20315 if (TREE_PUBLIC (n
->decl
))
20317 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
20319 if (dollar_inside
) {
20320 if (data
->function_descriptor
)
20321 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
20322 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
20324 if (data
->function_descriptor
)
20326 fputs ("\t.globl .", data
->file
);
20327 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
20328 putc ('\n', data
->file
);
20330 fputs ("\t.globl ", data
->file
);
20331 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
20332 putc ('\n', data
->file
);
20334 #ifdef ASM_WEAKEN_DECL
20335 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
20336 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
20343 if (data
->function_descriptor
)
20344 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
20345 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
20347 if (data
->function_descriptor
)
20349 fputs ("\t.lglobl .", data
->file
);
20350 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
20351 putc ('\n', data
->file
);
20353 fputs ("\t.lglobl ", data
->file
);
20354 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
20355 putc ('\n', data
->file
);
20357 if (data
->function_descriptor
)
20358 fputs (".", data
->file
);
20359 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
20360 fputs (":\n", data
->file
);
20365 #ifdef HAVE_GAS_HIDDEN
20366 /* Helper function to calculate visibility of a DECL
20367 and return the value as a const string. */
20369 static const char *
20370 rs6000_xcoff_visibility (tree decl
)
20372 static const char * const visibility_types
[] = {
20373 "", ",protected", ",hidden", ",internal"
20376 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
20377 return visibility_types
[vis
];
20382 /* This macro produces the initial definition of a function name.
20383 On the RS/6000, we need to place an extra '.' in the function name and
20384 output the function descriptor.
20385 Dollar signs are converted to underscores.
20387 The csect for the function will have already been created when
20388 text_section was selected. We do have to go back to that csect, however.
20390 The third and fourth parameters to the .function pseudo-op (16 and 044)
20391 are placeholders which no longer have any use.
20393 Because AIX assembler's .set command has unexpected semantics, we output
20394 all aliases as alternative labels in front of the definition. */
20397 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
20399 char *buffer
= (char *) alloca (strlen (name
) + 1);
20401 int dollar_inside
= 0;
20402 struct declare_alias_data data
= {file
, false};
20404 strcpy (buffer
, name
);
20405 p
= strchr (buffer
, '$');
20409 p
= strchr (p
+ 1, '$');
20411 if (TREE_PUBLIC (decl
))
20413 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
20415 if (dollar_inside
) {
20416 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
20417 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
20419 fputs ("\t.globl .", file
);
20420 RS6000_OUTPUT_BASENAME (file
, buffer
);
20421 #ifdef HAVE_GAS_HIDDEN
20422 fputs (rs6000_xcoff_visibility (decl
), file
);
20429 if (dollar_inside
) {
20430 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
20431 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
20433 fputs ("\t.lglobl .", file
);
20434 RS6000_OUTPUT_BASENAME (file
, buffer
);
20437 fputs ("\t.csect ", file
);
20438 RS6000_OUTPUT_BASENAME (file
, buffer
);
20439 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
20440 RS6000_OUTPUT_BASENAME (file
, buffer
);
20441 fputs (":\n", file
);
20442 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
20444 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
20445 RS6000_OUTPUT_BASENAME (file
, buffer
);
20446 fputs (", TOC[tc0], 0\n", file
);
20448 switch_to_section (function_section (decl
));
20450 RS6000_OUTPUT_BASENAME (file
, buffer
);
20451 fputs (":\n", file
);
20452 data
.function_descriptor
= true;
20453 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
20455 if (!DECL_IGNORED_P (decl
))
20457 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
20458 xcoffout_declare_function (file
, decl
, buffer
);
20459 else if (write_symbols
== DWARF2_DEBUG
)
20461 name
= (*targetm
.strip_name_encoding
) (name
);
20462 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
20469 /* Output assembly language to globalize a symbol from a DECL,
20470 possibly with visibility. */
20473 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
20475 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
20476 fputs (GLOBAL_ASM_OP
, stream
);
20477 RS6000_OUTPUT_BASENAME (stream
, name
);
20478 #ifdef HAVE_GAS_HIDDEN
20479 fputs (rs6000_xcoff_visibility (decl
), stream
);
20481 putc ('\n', stream
);
20484 /* Output assembly language to define a symbol as COMMON from a DECL,
20485 possibly with visibility. */
20488 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
20489 tree decl ATTRIBUTE_UNUSED
,
20491 unsigned HOST_WIDE_INT size
,
20492 unsigned HOST_WIDE_INT align
)
20494 unsigned HOST_WIDE_INT align2
= 2;
20497 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
20501 fputs (COMMON_ASM_OP
, stream
);
20502 RS6000_OUTPUT_BASENAME (stream
, name
);
20505 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
20508 #ifdef HAVE_GAS_HIDDEN
20510 fputs (rs6000_xcoff_visibility (decl
), stream
);
20512 putc ('\n', stream
);
20515 /* This macro produces the initial definition of a object (variable) name.
20516 Because AIX assembler's .set command has unexpected semantics, we output
20517 all aliases as alternative labels in front of the definition. */
20520 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
20522 struct declare_alias_data data
= {file
, false};
20523 RS6000_OUTPUT_BASENAME (file
, name
);
20524 fputs (":\n", file
);
20525 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
20529 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20532 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
20534 fputs (integer_asm_op (size
, FALSE
), file
);
20535 assemble_name (file
, label
);
20536 fputs ("-$", file
);
20539 /* Output a symbol offset relative to the dbase for the current object.
20540 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20543 __gcc_unwind_dbase is embedded in all executables/libraries through
20544 libgcc/config/rs6000/crtdbase.S. */
20547 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
20549 fputs (integer_asm_op (size
, FALSE
), file
);
20550 assemble_name (file
, label
);
20551 fputs("-__gcc_unwind_dbase", file
);
20556 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
20560 const char *symname
;
20562 default_encode_section_info (decl
, rtl
, first
);
20564 /* Careful not to prod global register variables. */
20567 symbol
= XEXP (rtl
, 0);
20568 if (!SYMBOL_REF_P (symbol
))
20571 flags
= SYMBOL_REF_FLAGS (symbol
);
20573 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
20574 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
20576 SYMBOL_REF_FLAGS (symbol
) = flags
;
20578 /* Append mapping class to extern decls. */
20579 symname
= XSTR (symbol
, 0);
20580 if (decl
/* sync condition with assemble_external () */
20581 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
20582 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
20583 || TREE_CODE (decl
) == FUNCTION_DECL
)
20584 && symname
[strlen (symname
) - 1] != ']')
20586 char *newname
= (char *) alloca (strlen (symname
) + 5);
20587 strcpy (newname
, symname
);
20588 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
20589 ? "[DS]" : "[UA]"));
20590 XSTR (symbol
, 0) = ggc_strdup (newname
);
20593 #endif /* HAVE_AS_TLS */
20594 #endif /* TARGET_XCOFF */
20597 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
20598 const char *name
, const char *val
)
20600 fputs ("\t.weak\t", stream
);
20601 RS6000_OUTPUT_BASENAME (stream
, name
);
20602 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
20603 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
20606 fputs ("[DS]", stream
);
20607 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20609 fputs (rs6000_xcoff_visibility (decl
), stream
);
20611 fputs ("\n\t.weak\t.", stream
);
20612 RS6000_OUTPUT_BASENAME (stream
, name
);
20614 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20616 fputs (rs6000_xcoff_visibility (decl
), stream
);
20618 fputc ('\n', stream
);
20621 #ifdef ASM_OUTPUT_DEF
20622 ASM_OUTPUT_DEF (stream
, name
, val
);
20624 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
20625 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
20627 fputs ("\t.set\t.", stream
);
20628 RS6000_OUTPUT_BASENAME (stream
, name
);
20629 fputs (",.", stream
);
20630 RS6000_OUTPUT_BASENAME (stream
, val
);
20631 fputc ('\n', stream
);
20637 /* Return true if INSN should not be copied. */
20640 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
20642 return recog_memoized (insn
) >= 0
20643 && get_attr_cannot_copy (insn
);
20646 /* Compute a (partial) cost for rtx X. Return true if the complete
20647 cost has been computed, and false if subexpressions should be
20648 scanned. In either case, *TOTAL contains the cost result. */
20651 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
20652 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
20654 int code
= GET_CODE (x
);
20658 /* On the RS/6000, if it is valid in the insn, it is free. */
20660 if (((outer_code
== SET
20661 || outer_code
== PLUS
20662 || outer_code
== MINUS
)
20663 && (satisfies_constraint_I (x
)
20664 || satisfies_constraint_L (x
)))
20665 || (outer_code
== AND
20666 && (satisfies_constraint_K (x
)
20668 ? satisfies_constraint_L (x
)
20669 : satisfies_constraint_J (x
))))
20670 || ((outer_code
== IOR
|| outer_code
== XOR
)
20671 && (satisfies_constraint_K (x
)
20673 ? satisfies_constraint_L (x
)
20674 : satisfies_constraint_J (x
))))
20675 || outer_code
== ASHIFT
20676 || outer_code
== ASHIFTRT
20677 || outer_code
== LSHIFTRT
20678 || outer_code
== ROTATE
20679 || outer_code
== ROTATERT
20680 || outer_code
== ZERO_EXTRACT
20681 || (outer_code
== MULT
20682 && satisfies_constraint_I (x
))
20683 || ((outer_code
== DIV
|| outer_code
== UDIV
20684 || outer_code
== MOD
|| outer_code
== UMOD
)
20685 && exact_log2 (INTVAL (x
)) >= 0)
20686 || (outer_code
== COMPARE
20687 && (satisfies_constraint_I (x
)
20688 || satisfies_constraint_K (x
)))
20689 || ((outer_code
== EQ
|| outer_code
== NE
)
20690 && (satisfies_constraint_I (x
)
20691 || satisfies_constraint_K (x
)
20693 ? satisfies_constraint_L (x
)
20694 : satisfies_constraint_J (x
))))
20695 || (outer_code
== GTU
20696 && satisfies_constraint_I (x
))
20697 || (outer_code
== LTU
20698 && satisfies_constraint_P (x
)))
20703 else if ((outer_code
== PLUS
20704 && reg_or_add_cint_operand (x
, VOIDmode
))
20705 || (outer_code
== MINUS
20706 && reg_or_sub_cint_operand (x
, VOIDmode
))
20707 || ((outer_code
== SET
20708 || outer_code
== IOR
20709 || outer_code
== XOR
)
20711 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
20713 *total
= COSTS_N_INSNS (1);
20719 case CONST_WIDE_INT
:
20723 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20727 /* When optimizing for size, MEM should be slightly more expensive
20728 than generating address, e.g., (plus (reg) (const)).
20729 L1 cache latency is about two instructions. */
20730 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20731 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
20732 *total
+= COSTS_N_INSNS (100);
20741 if (FLOAT_MODE_P (mode
))
20742 *total
= rs6000_cost
->fp
;
20744 *total
= COSTS_N_INSNS (1);
20748 if (CONST_INT_P (XEXP (x
, 1))
20749 && satisfies_constraint_I (XEXP (x
, 1)))
20751 if (INTVAL (XEXP (x
, 1)) >= -256
20752 && INTVAL (XEXP (x
, 1)) <= 255)
20753 *total
= rs6000_cost
->mulsi_const9
;
20755 *total
= rs6000_cost
->mulsi_const
;
20757 else if (mode
== SFmode
)
20758 *total
= rs6000_cost
->fp
;
20759 else if (FLOAT_MODE_P (mode
))
20760 *total
= rs6000_cost
->dmul
;
20761 else if (mode
== DImode
)
20762 *total
= rs6000_cost
->muldi
;
20764 *total
= rs6000_cost
->mulsi
;
20768 if (mode
== SFmode
)
20769 *total
= rs6000_cost
->fp
;
20771 *total
= rs6000_cost
->dmul
;
20776 if (FLOAT_MODE_P (mode
))
20778 *total
= mode
== DFmode
? rs6000_cost
->ddiv
20779 : rs6000_cost
->sdiv
;
20786 if (CONST_INT_P (XEXP (x
, 1))
20787 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
20789 if (code
== DIV
|| code
== MOD
)
20791 *total
= COSTS_N_INSNS (2);
20794 *total
= COSTS_N_INSNS (1);
20798 if (GET_MODE (XEXP (x
, 1)) == DImode
)
20799 *total
= rs6000_cost
->divdi
;
20801 *total
= rs6000_cost
->divsi
;
20803 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20804 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
20805 *total
+= COSTS_N_INSNS (2);
20809 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
20813 *total
= COSTS_N_INSNS (4);
20817 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
20821 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
20825 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
20828 *total
= COSTS_N_INSNS (1);
20832 if (CONST_INT_P (XEXP (x
, 1)))
20834 rtx left
= XEXP (x
, 0);
20835 rtx_code left_code
= GET_CODE (left
);
20837 /* rotate-and-mask: 1 insn. */
20838 if ((left_code
== ROTATE
20839 || left_code
== ASHIFT
20840 || left_code
== LSHIFTRT
)
20841 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
20843 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
20844 if (!CONST_INT_P (XEXP (left
, 1)))
20845 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
20846 *total
+= COSTS_N_INSNS (1);
20850 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20851 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
20852 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
20853 || (val
& 0xffff) == val
20854 || (val
& 0xffff0000) == val
20855 || ((val
& 0xffff) == 0 && mode
== SImode
))
20857 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
20858 *total
+= COSTS_N_INSNS (1);
20863 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
20865 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
20866 *total
+= COSTS_N_INSNS (2);
20871 *total
= COSTS_N_INSNS (1);
20876 *total
= COSTS_N_INSNS (1);
20882 *total
= COSTS_N_INSNS (1);
20886 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20887 the sign extend and shift separately within the insn. */
20888 if (TARGET_EXTSWSLI
&& mode
== DImode
20889 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
20890 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
20901 /* Handle mul_highpart. */
20902 if (outer_code
== TRUNCATE
20903 && GET_CODE (XEXP (x
, 0)) == MULT
)
20905 if (mode
== DImode
)
20906 *total
= rs6000_cost
->muldi
;
20908 *total
= rs6000_cost
->mulsi
;
20911 else if (outer_code
== AND
)
20914 *total
= COSTS_N_INSNS (1);
20919 if (MEM_P (XEXP (x
, 0)))
20922 *total
= COSTS_N_INSNS (1);
20928 if (!FLOAT_MODE_P (mode
))
20930 *total
= COSTS_N_INSNS (1);
20936 case UNSIGNED_FLOAT
:
20939 case FLOAT_TRUNCATE
:
20940 *total
= rs6000_cost
->fp
;
20944 if (mode
== DFmode
)
20945 *total
= rs6000_cost
->sfdf_convert
;
20947 *total
= rs6000_cost
->fp
;
20954 *total
= COSTS_N_INSNS (1);
20957 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
20959 *total
= rs6000_cost
->fp
;
20968 /* Carry bit requires mode == Pmode.
20969 NEG or PLUS already counted so only add one. */
20971 && (outer_code
== NEG
|| outer_code
== PLUS
))
20973 *total
= COSTS_N_INSNS (1);
20981 if (outer_code
== SET
)
20983 if (XEXP (x
, 1) == const0_rtx
)
20985 *total
= COSTS_N_INSNS (2);
20990 *total
= COSTS_N_INSNS (3);
20995 if (outer_code
== COMPARE
)
21009 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21012 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21013 int opno
, int *total
, bool speed
)
21015 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
21018 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21019 "opno = %d, total = %d, speed = %s, x:\n",
21020 ret
? "complete" : "scan inner",
21021 GET_MODE_NAME (mode
),
21022 GET_RTX_NAME (outer_code
),
21025 speed
? "true" : "false");
21033 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
21035 if (recog_memoized (insn
) < 0)
21038 /* If we are optimizing for size, just use the length. */
21040 return get_attr_length (insn
);
21042 /* Use the cost if provided. */
21043 int cost
= get_attr_cost (insn
);
21047 /* If the insn tells us how many insns there are, use that. Otherwise use
21048 the length/4. Adjust the insn length to remove the extra size that
21049 prefixed instructions take. */
21050 int n
= get_attr_num_insns (insn
);
21053 int length
= get_attr_length (insn
);
21054 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
21057 ADJUST_INSN_LENGTH (insn
, adjust
);
21064 enum attr_type type
= get_attr_type (insn
);
21071 cost
= COSTS_N_INSNS (n
+ 1);
21075 switch (get_attr_size (insn
))
21078 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
21081 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
21084 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
21087 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
21090 gcc_unreachable ();
21094 switch (get_attr_size (insn
))
21097 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
21100 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
21103 gcc_unreachable ();
21108 cost
= n
* rs6000_cost
->fp
;
21111 cost
= n
* rs6000_cost
->dmul
;
21114 cost
= n
* rs6000_cost
->sdiv
;
21117 cost
= n
* rs6000_cost
->ddiv
;
21124 cost
= COSTS_N_INSNS (n
+ 2);
21128 cost
= COSTS_N_INSNS (n
);
21134 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21137 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
21138 addr_space_t as
, bool speed
)
21140 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
21142 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21143 ret
, speed
? "true" : "false");
21150 /* A C expression returning the cost of moving data from a register of class
21151 CLASS1 to one of CLASS2. */
21154 rs6000_register_move_cost (machine_mode mode
,
21155 reg_class_t from
, reg_class_t to
)
21158 reg_class_t rclass
;
21160 if (TARGET_DEBUG_COST
)
21163 /* If we have VSX, we can easily move between FPR or Altivec registers,
21164 otherwise we can only easily move within classes.
21165 Do this first so we give best-case answers for union classes
21166 containing both gprs and vsx regs. */
21167 HARD_REG_SET to_vsx
, from_vsx
;
21168 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
21169 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
21170 if (!hard_reg_set_empty_p (to_vsx
)
21171 && !hard_reg_set_empty_p (from_vsx
)
21173 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
21175 int reg
= FIRST_FPR_REGNO
;
21177 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
21178 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
21179 reg
= FIRST_ALTIVEC_REGNO
;
21180 ret
= 2 * hard_regno_nregs (reg
, mode
);
21183 /* Moves from/to GENERAL_REGS. */
21184 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
21185 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
21187 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
21189 if (TARGET_DIRECT_MOVE
)
21191 /* Keep the cost for direct moves above that for within
21192 a register class even if the actual processor cost is
21193 comparable. We do this because a direct move insn
21194 can't be a nop, whereas with ideal register
21195 allocation a move within the same class might turn
21196 out to be a nop. */
21197 if (rs6000_tune
== PROCESSOR_POWER9
21198 || rs6000_tune
== PROCESSOR_FUTURE
)
21199 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21201 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21202 /* SFmode requires a conversion when moving between gprs
21204 if (mode
== SFmode
)
21208 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
21209 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
21212 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21214 else if (rclass
== CR_REGS
)
21217 /* For those processors that have slow LR/CTR moves, make them more
21218 expensive than memory in order to bias spills to memory .*/
21219 else if ((rs6000_tune
== PROCESSOR_POWER6
21220 || rs6000_tune
== PROCESSOR_POWER7
21221 || rs6000_tune
== PROCESSOR_POWER8
21222 || rs6000_tune
== PROCESSOR_POWER9
)
21223 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
21224 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21227 /* A move will cost one instruction per GPR moved. */
21228 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21231 /* Everything else has to go through GENERAL_REGS. */
21233 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
21234 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
21236 if (TARGET_DEBUG_COST
)
21238 if (dbg_cost_ctrl
== 1)
21240 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21241 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
21242 reg_class_names
[to
]);
21249 /* A C expressions returning the cost of moving data of MODE from a register to
21253 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
21254 bool in ATTRIBUTE_UNUSED
)
21258 if (TARGET_DEBUG_COST
)
21261 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
21262 ret
= 4 * hard_regno_nregs (0, mode
);
21263 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
21264 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
21265 ret
= 4 * hard_regno_nregs (32, mode
);
21266 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
21267 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
21269 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
21271 if (TARGET_DEBUG_COST
)
21273 if (dbg_cost_ctrl
== 1)
21275 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21276 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
21283 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21285 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21286 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21287 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21288 move cost between GENERAL_REGS and VSX_REGS low.
21290 It might seem reasonable to use a union class. After all, if usage
21291 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21292 rather than memory. However, in cases where register pressure of
21293 both is high, like the cactus_adm spec test, allowing
21294 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21295 the first scheduling pass. This is partly due to an allocno of
21296 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21297 class, which gives too high a pressure for GENERAL_REGS and too low
21298 for VSX_REGS. So, force a choice of the subclass here.
21300 The best class is also the union if GENERAL_REGS and VSX_REGS have
21301 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21302 allocno class, since trying to narrow down the class by regno mode
21303 is prone to error. For example, SImode is allowed in VSX regs and
21304 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21305 it would be wrong to choose an allocno of GENERAL_REGS based on
21309 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
21310 reg_class_t allocno_class
,
21311 reg_class_t best_class
)
21313 switch (allocno_class
)
21315 case GEN_OR_VSX_REGS
:
21316 /* best_class must be a subset of allocno_class. */
21317 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
21318 || best_class
== GEN_OR_FLOAT_REGS
21319 || best_class
== VSX_REGS
21320 || best_class
== ALTIVEC_REGS
21321 || best_class
== FLOAT_REGS
21322 || best_class
== GENERAL_REGS
21323 || best_class
== BASE_REGS
);
21324 /* Use best_class but choose wider classes when copying from the
21325 wider class to best_class is cheap. This mimics IRA choice
21326 of allocno class. */
21327 if (best_class
== BASE_REGS
)
21328 return GENERAL_REGS
;
21330 && (best_class
== FLOAT_REGS
|| best_class
== ALTIVEC_REGS
))
21338 return allocno_class
;
21341 /* Returns a code for a target-specific builtin that implements
21342 reciprocal of the function, or NULL_TREE if not available. */
21345 rs6000_builtin_reciprocal (tree fndecl
)
21347 switch (DECL_MD_FUNCTION_CODE (fndecl
))
21349 case VSX_BUILTIN_XVSQRTDP
:
21350 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
21353 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
21355 case VSX_BUILTIN_XVSQRTSP
:
21356 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
21359 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
21366 /* Load up a constant. If the mode is a vector mode, splat the value across
21367 all of the vector elements. */
21370 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
21374 if (mode
== SFmode
|| mode
== DFmode
)
21376 rtx d
= const_double_from_real_value (dconst
, mode
);
21377 reg
= force_reg (mode
, d
);
21379 else if (mode
== V4SFmode
)
21381 rtx d
= const_double_from_real_value (dconst
, SFmode
);
21382 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
21383 reg
= gen_reg_rtx (mode
);
21384 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
21386 else if (mode
== V2DFmode
)
21388 rtx d
= const_double_from_real_value (dconst
, DFmode
);
21389 rtvec v
= gen_rtvec (2, d
, d
);
21390 reg
= gen_reg_rtx (mode
);
21391 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
21394 gcc_unreachable ();
21399 /* Generate an FMA instruction. */
21402 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
21404 machine_mode mode
= GET_MODE (target
);
21407 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
21408 gcc_assert (dst
!= NULL
);
21411 emit_move_insn (target
, dst
);
21414 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21417 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
21419 machine_mode mode
= GET_MODE (dst
);
21422 /* This is a tad more complicated, since the fnma_optab is for
21423 a different expression: fma(-m1, m2, a), which is the same
21424 thing except in the case of signed zeros.
21426 Fortunately we know that if FMA is supported that FNMSUB is
21427 also supported in the ISA. Just expand it directly. */
21429 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
21431 r
= gen_rtx_NEG (mode
, a
);
21432 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
21433 r
= gen_rtx_NEG (mode
, r
);
21434 emit_insn (gen_rtx_SET (dst
, r
));
21437 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21438 add a reg_note saying that this was a division. Support both scalar and
21439 vector divide. Assumes no trapping math and finite arguments. */
21442 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
21444 machine_mode mode
= GET_MODE (dst
);
21445 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
21448 /* Low precision estimates guarantee 5 bits of accuracy. High
21449 precision estimates guarantee 14 bits of accuracy. SFmode
21450 requires 23 bits of accuracy. DFmode requires 52 bits of
21451 accuracy. Each pass at least doubles the accuracy, leading
21452 to the following. */
21453 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
21454 if (mode
== DFmode
|| mode
== V2DFmode
)
21457 enum insn_code code
= optab_handler (smul_optab
, mode
);
21458 insn_gen_fn gen_mul
= GEN_FCN (code
);
21460 gcc_assert (code
!= CODE_FOR_nothing
);
21462 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
21464 /* x0 = 1./d estimate */
21465 x0
= gen_reg_rtx (mode
);
21466 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
21469 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21472 /* e0 = 1. - d * x0 */
21473 e0
= gen_reg_rtx (mode
);
21474 rs6000_emit_nmsub (e0
, d
, x0
, one
);
21476 /* x1 = x0 + e0 * x0 */
21477 x1
= gen_reg_rtx (mode
);
21478 rs6000_emit_madd (x1
, e0
, x0
, x0
);
21480 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
21481 ++i
, xprev
= xnext
, eprev
= enext
) {
21483 /* enext = eprev * eprev */
21484 enext
= gen_reg_rtx (mode
);
21485 emit_insn (gen_mul (enext
, eprev
, eprev
));
21487 /* xnext = xprev + enext * xprev */
21488 xnext
= gen_reg_rtx (mode
);
21489 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
21495 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21497 /* u = n * xprev */
21498 u
= gen_reg_rtx (mode
);
21499 emit_insn (gen_mul (u
, n
, xprev
));
21501 /* v = n - (d * u) */
21502 v
= gen_reg_rtx (mode
);
21503 rs6000_emit_nmsub (v
, d
, u
, n
);
21505 /* dst = (v * xprev) + u */
21506 rs6000_emit_madd (dst
, v
, xprev
, u
);
21509 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
21512 /* Goldschmidt's Algorithm for single/double-precision floating point
21513 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21516 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
21518 machine_mode mode
= GET_MODE (src
);
21519 rtx e
= gen_reg_rtx (mode
);
21520 rtx g
= gen_reg_rtx (mode
);
21521 rtx h
= gen_reg_rtx (mode
);
21523 /* Low precision estimates guarantee 5 bits of accuracy. High
21524 precision estimates guarantee 14 bits of accuracy. SFmode
21525 requires 23 bits of accuracy. DFmode requires 52 bits of
21526 accuracy. Each pass at least doubles the accuracy, leading
21527 to the following. */
21528 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
21529 if (mode
== DFmode
|| mode
== V2DFmode
)
21534 enum insn_code code
= optab_handler (smul_optab
, mode
);
21535 insn_gen_fn gen_mul
= GEN_FCN (code
);
21537 gcc_assert (code
!= CODE_FOR_nothing
);
21539 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
21541 /* e = rsqrt estimate */
21542 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
21545 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21548 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
21550 if (mode
== SFmode
)
21552 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
21555 emit_move_insn (e
, target
);
21559 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
21560 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
21564 /* g = sqrt estimate. */
21565 emit_insn (gen_mul (g
, e
, src
));
21566 /* h = 1/(2*sqrt) estimate. */
21567 emit_insn (gen_mul (h
, e
, mhalf
));
21573 rtx t
= gen_reg_rtx (mode
);
21574 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
21575 /* Apply correction directly to 1/rsqrt estimate. */
21576 rs6000_emit_madd (dst
, e
, t
, e
);
21580 for (i
= 0; i
< passes
; i
++)
21582 rtx t1
= gen_reg_rtx (mode
);
21583 rtx g1
= gen_reg_rtx (mode
);
21584 rtx h1
= gen_reg_rtx (mode
);
21586 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
21587 rs6000_emit_madd (g1
, g
, t1
, g
);
21588 rs6000_emit_madd (h1
, h
, t1
, h
);
21593 /* Multiply by 2 for 1/rsqrt. */
21594 emit_insn (gen_add3_insn (dst
, h
, h
));
21599 rtx t
= gen_reg_rtx (mode
);
21600 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
21601 rs6000_emit_madd (dst
, g
, t
, g
);
21607 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21608 (Power7) targets. DST is the target, and SRC is the argument operand. */
21611 rs6000_emit_popcount (rtx dst
, rtx src
)
21613 machine_mode mode
= GET_MODE (dst
);
21616 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21617 if (TARGET_POPCNTD
)
21619 if (mode
== SImode
)
21620 emit_insn (gen_popcntdsi2 (dst
, src
));
21622 emit_insn (gen_popcntddi2 (dst
, src
));
21626 tmp1
= gen_reg_rtx (mode
);
21628 if (mode
== SImode
)
21630 emit_insn (gen_popcntbsi2 (tmp1
, src
));
21631 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
21633 tmp2
= force_reg (SImode
, tmp2
);
21634 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
21638 emit_insn (gen_popcntbdi2 (tmp1
, src
));
21639 tmp2
= expand_mult (DImode
, tmp1
,
21640 GEN_INT ((HOST_WIDE_INT
)
21641 0x01010101 << 32 | 0x01010101),
21643 tmp2
= force_reg (DImode
, tmp2
);
21644 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
21649 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21650 target, and SRC is the argument operand. */
21653 rs6000_emit_parity (rtx dst
, rtx src
)
21655 machine_mode mode
= GET_MODE (dst
);
21658 tmp
= gen_reg_rtx (mode
);
21660 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21663 if (mode
== SImode
)
21665 emit_insn (gen_popcntbsi2 (tmp
, src
));
21666 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
21670 emit_insn (gen_popcntbdi2 (tmp
, src
));
21671 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
21676 if (mode
== SImode
)
21678 /* Is mult+shift >= shift+xor+shift+xor? */
21679 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
21681 rtx tmp1
, tmp2
, tmp3
, tmp4
;
21683 tmp1
= gen_reg_rtx (SImode
);
21684 emit_insn (gen_popcntbsi2 (tmp1
, src
));
21686 tmp2
= gen_reg_rtx (SImode
);
21687 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
21688 tmp3
= gen_reg_rtx (SImode
);
21689 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
21691 tmp4
= gen_reg_rtx (SImode
);
21692 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
21693 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
21696 rs6000_emit_popcount (tmp
, src
);
21697 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
21701 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21702 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
21704 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
21706 tmp1
= gen_reg_rtx (DImode
);
21707 emit_insn (gen_popcntbdi2 (tmp1
, src
));
21709 tmp2
= gen_reg_rtx (DImode
);
21710 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
21711 tmp3
= gen_reg_rtx (DImode
);
21712 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
21714 tmp4
= gen_reg_rtx (DImode
);
21715 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
21716 tmp5
= gen_reg_rtx (DImode
);
21717 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
21719 tmp6
= gen_reg_rtx (DImode
);
21720 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
21721 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
21724 rs6000_emit_popcount (tmp
, src
);
21725 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
21729 /* Expand an Altivec constant permutation for little endian mode.
21730 OP0 and OP1 are the input vectors and TARGET is the output vector.
21731 SEL specifies the constant permutation vector.
21733 There are two issues: First, the two input operands must be
21734 swapped so that together they form a double-wide array in LE
21735 order. Second, the vperm instruction has surprising behavior
21736 in LE mode: it interprets the elements of the source vectors
21737 in BE mode ("left to right") and interprets the elements of
21738 the destination vector in LE mode ("right to left"). To
21739 correct for this, we must subtract each element of the permute
21740 control vector from 31.
21742 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21743 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21744 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21745 serve as the permute control vector. Then, in BE mode,
21749 places the desired result in vr9. However, in LE mode the
21750 vector contents will be
21752 vr10 = 00000003 00000002 00000001 00000000
21753 vr11 = 00000007 00000006 00000005 00000004
21755 The result of the vperm using the same permute control vector is
21757 vr9 = 05000000 07000000 01000000 03000000
21759 That is, the leftmost 4 bytes of vr10 are interpreted as the
21760 source for the rightmost 4 bytes of vr9, and so on.
21762 If we change the permute control vector to
21764 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21772 vr9 = 00000006 00000004 00000002 00000000. */
21775 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
21776 const vec_perm_indices
&sel
)
21780 rtx constv
, unspec
;
21782 /* Unpack and adjust the constant selector. */
21783 for (i
= 0; i
< 16; ++i
)
21785 unsigned int elt
= 31 - (sel
[i
] & 31);
21786 perm
[i
] = GEN_INT (elt
);
21789 /* Expand to a permute, swapping the inputs and using the
21790 adjusted selector. */
21792 op0
= force_reg (V16QImode
, op0
);
21794 op1
= force_reg (V16QImode
, op1
);
21796 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
21797 constv
= force_reg (V16QImode
, constv
);
21798 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
21800 if (!REG_P (target
))
21802 rtx tmp
= gen_reg_rtx (V16QImode
);
21803 emit_move_insn (tmp
, unspec
);
21807 emit_move_insn (target
, unspec
);
21810 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21811 permute control vector. But here it's not a constant, so we must
21812 generate a vector NAND or NOR to do the adjustment. */
21815 altivec_expand_vec_perm_le (rtx operands
[4])
21817 rtx notx
, iorx
, unspec
;
21818 rtx target
= operands
[0];
21819 rtx op0
= operands
[1];
21820 rtx op1
= operands
[2];
21821 rtx sel
= operands
[3];
21823 rtx norreg
= gen_reg_rtx (V16QImode
);
21824 machine_mode mode
= GET_MODE (target
);
21826 /* Get everything in regs so the pattern matches. */
21828 op0
= force_reg (mode
, op0
);
21830 op1
= force_reg (mode
, op1
);
21832 sel
= force_reg (V16QImode
, sel
);
21833 if (!REG_P (target
))
21834 tmp
= gen_reg_rtx (mode
);
21836 if (TARGET_P9_VECTOR
)
21838 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
21843 /* Invert the selector with a VNAND if available, else a VNOR.
21844 The VNAND is preferred for future fusion opportunities. */
21845 notx
= gen_rtx_NOT (V16QImode
, sel
);
21846 iorx
= (TARGET_P8_VECTOR
21847 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
21848 : gen_rtx_AND (V16QImode
, notx
, notx
));
21849 emit_insn (gen_rtx_SET (norreg
, iorx
));
21851 /* Permute with operands reversed and adjusted selector. */
21852 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
21856 /* Copy into target, possibly by way of a register. */
21857 if (!REG_P (target
))
21859 emit_move_insn (tmp
, unspec
);
21863 emit_move_insn (target
, unspec
);
21866 /* Expand an Altivec constant permutation. Return true if we match
21867 an efficient implementation; false to fall back to VPERM.
21869 OP0 and OP1 are the input vectors and TARGET is the output vector.
21870 SEL specifies the constant permutation vector. */
21873 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
21874 const vec_perm_indices
&sel
)
21876 struct altivec_perm_insn
{
21877 HOST_WIDE_INT mask
;
21878 enum insn_code impl
;
21879 unsigned char perm
[16];
21881 static const struct altivec_perm_insn patterns
[] = {
21882 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
21883 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21884 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
21885 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21886 { OPTION_MASK_ALTIVEC
,
21887 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
21888 : CODE_FOR_altivec_vmrglb_direct
),
21889 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21890 { OPTION_MASK_ALTIVEC
,
21891 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
21892 : CODE_FOR_altivec_vmrglh_direct
),
21893 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21894 { OPTION_MASK_ALTIVEC
,
21895 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
21896 : CODE_FOR_altivec_vmrglw_direct
),
21897 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21898 { OPTION_MASK_ALTIVEC
,
21899 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
21900 : CODE_FOR_altivec_vmrghb_direct
),
21901 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21902 { OPTION_MASK_ALTIVEC
,
21903 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
21904 : CODE_FOR_altivec_vmrghh_direct
),
21905 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21906 { OPTION_MASK_ALTIVEC
,
21907 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
21908 : CODE_FOR_altivec_vmrghw_direct
),
21909 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21910 { OPTION_MASK_P8_VECTOR
,
21911 (BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
21912 : CODE_FOR_p8_vmrgow_v4sf_direct
),
21913 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21914 { OPTION_MASK_P8_VECTOR
,
21915 (BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
21916 : CODE_FOR_p8_vmrgew_v4sf_direct
),
21917 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21920 unsigned int i
, j
, elt
, which
;
21921 unsigned char perm
[16];
21925 /* Unpack the constant selector. */
21926 for (i
= which
= 0; i
< 16; ++i
)
21929 which
|= (elt
< 16 ? 1 : 2);
21933 /* Simplify the constant selector based on operands. */
21937 gcc_unreachable ();
21941 if (!rtx_equal_p (op0
, op1
))
21946 for (i
= 0; i
< 16; ++i
)
21958 /* Look for splat patterns. */
21963 for (i
= 0; i
< 16; ++i
)
21964 if (perm
[i
] != elt
)
21968 if (!BYTES_BIG_ENDIAN
)
21970 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
21976 for (i
= 0; i
< 16; i
+= 2)
21977 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
21981 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
21982 x
= gen_reg_rtx (V8HImode
);
21983 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
21985 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
21992 for (i
= 0; i
< 16; i
+= 4)
21994 || perm
[i
+ 1] != elt
+ 1
21995 || perm
[i
+ 2] != elt
+ 2
21996 || perm
[i
+ 3] != elt
+ 3)
22000 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
22001 x
= gen_reg_rtx (V4SImode
);
22002 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
22004 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22010 /* Look for merge and pack patterns. */
22011 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
22015 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
22018 elt
= patterns
[j
].perm
[0];
22019 if (perm
[0] == elt
)
22021 else if (perm
[0] == elt
+ 16)
22025 for (i
= 1; i
< 16; ++i
)
22027 elt
= patterns
[j
].perm
[i
];
22029 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
22030 else if (one_vec
&& elt
>= 16)
22032 if (perm
[i
] != elt
)
22037 enum insn_code icode
= patterns
[j
].impl
;
22038 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
22039 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
22041 /* For little-endian, don't use vpkuwum and vpkuhum if the
22042 underlying vector type is not V4SI and V8HI, respectively.
22043 For example, using vpkuwum with a V8HI picks up the even
22044 halfwords (BE numbering) when the even halfwords (LE
22045 numbering) are what we need. */
22046 if (!BYTES_BIG_ENDIAN
22047 && icode
== CODE_FOR_altivec_vpkuwum_direct
22049 && GET_MODE (op0
) != V4SImode
)
22051 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
22053 if (!BYTES_BIG_ENDIAN
22054 && icode
== CODE_FOR_altivec_vpkuhum_direct
22056 && GET_MODE (op0
) != V8HImode
)
22058 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
22061 /* For little-endian, the two input operands must be swapped
22062 (or swapped back) to ensure proper right-to-left numbering
22064 if (swapped
^ !BYTES_BIG_ENDIAN
)
22065 std::swap (op0
, op1
);
22066 if (imode
!= V16QImode
)
22068 op0
= gen_lowpart (imode
, op0
);
22069 op1
= gen_lowpart (imode
, op1
);
22071 if (omode
== V16QImode
)
22074 x
= gen_reg_rtx (omode
);
22075 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
22076 if (omode
!= V16QImode
)
22077 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22082 if (!BYTES_BIG_ENDIAN
)
22084 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
22091 /* Expand a VSX Permute Doubleword constant permutation.
22092 Return true if we match an efficient implementation. */
22095 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
22096 unsigned char perm0
, unsigned char perm1
)
22100 /* If both selectors come from the same operand, fold to single op. */
22101 if ((perm0
& 2) == (perm1
& 2))
22108 /* If both operands are equal, fold to simpler permutation. */
22109 if (rtx_equal_p (op0
, op1
))
22112 perm1
= (perm1
& 1) + 2;
22114 /* If the first selector comes from the second operand, swap. */
22115 else if (perm0
& 2)
22121 std::swap (op0
, op1
);
22123 /* If the second selector does not come from the second operand, fail. */
22124 else if ((perm1
& 2) == 0)
22128 if (target
!= NULL
)
22130 machine_mode vmode
, dmode
;
22133 vmode
= GET_MODE (target
);
22134 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
22135 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
22136 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
22137 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
22138 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
22139 emit_insn (gen_rtx_SET (target
, x
));
22144 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22147 rs6000_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
,
22148 rtx op1
, const vec_perm_indices
&sel
)
22150 bool testing_p
= !target
;
22152 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22153 if (TARGET_ALTIVEC
&& testing_p
)
22156 /* Check for ps_merge* or xxpermdi insns. */
22157 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
22161 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
22162 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
22164 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
22168 if (TARGET_ALTIVEC
)
22170 /* Force the target-independent code to lower to V16QImode. */
22171 if (vmode
!= V16QImode
)
22173 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
22180 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22181 OP0 and OP1 are the input vectors and TARGET is the output vector.
22182 PERM specifies the constant permutation vector. */
22185 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
22186 machine_mode vmode
, const vec_perm_builder
&perm
)
22188 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
22190 emit_move_insn (target
, x
);
22193 /* Expand an extract even operation. */
22196 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
22198 machine_mode vmode
= GET_MODE (target
);
22199 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
22200 vec_perm_builder
perm (nelt
, nelt
, 1);
22202 for (i
= 0; i
< nelt
; i
++)
22203 perm
.quick_push (i
* 2);
22205 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
22208 /* Expand a vector interleave operation. */
22211 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
22213 machine_mode vmode
= GET_MODE (target
);
22214 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
22215 vec_perm_builder
perm (nelt
, nelt
, 1);
22217 high
= (highp
? 0 : nelt
/ 2);
22218 for (i
= 0; i
< nelt
/ 2; i
++)
22220 perm
.quick_push (i
+ high
);
22221 perm
.quick_push (i
+ nelt
+ high
);
22224 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
22227 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22229 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
22231 HOST_WIDE_INT
hwi_scale (scale
);
22232 REAL_VALUE_TYPE r_pow
;
22233 rtvec v
= rtvec_alloc (2);
22235 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
22236 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
22237 elt
= const_double_from_real_value (r_pow
, DFmode
);
22238 RTVEC_ELT (v
, 0) = elt
;
22239 RTVEC_ELT (v
, 1) = elt
;
22240 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
22241 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
22244 /* Return an RTX representing where to find the function value of a
22245 function returning MODE. */
22247 rs6000_complex_function_value (machine_mode mode
)
22249 unsigned int regno
;
22251 machine_mode inner
= GET_MODE_INNER (mode
);
22252 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
22254 if (TARGET_FLOAT128_TYPE
22256 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
22257 regno
= ALTIVEC_ARG_RETURN
;
22259 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
22260 regno
= FP_ARG_RETURN
;
22264 regno
= GP_ARG_RETURN
;
22266 /* 32-bit is OK since it'll go in r3/r4. */
22267 if (TARGET_32BIT
&& inner_bytes
>= 4)
22268 return gen_rtx_REG (mode
, regno
);
22271 if (inner_bytes
>= 8)
22272 return gen_rtx_REG (mode
, regno
);
22274 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
22276 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
22277 GEN_INT (inner_bytes
));
22278 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
22281 /* Return an rtx describing a return value of MODE as a PARALLEL
22282 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22283 stride REG_STRIDE. */
22286 rs6000_parallel_return (machine_mode mode
,
22287 int n_elts
, machine_mode elt_mode
,
22288 unsigned int regno
, unsigned int reg_stride
)
22290 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
22293 for (i
= 0; i
< n_elts
; i
++)
22295 rtx r
= gen_rtx_REG (elt_mode
, regno
);
22296 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
22297 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
22298 regno
+= reg_stride
;
22304 /* Target hook for TARGET_FUNCTION_VALUE.
22306 An integer value is in r3 and a floating-point value is in fp1,
22307 unless -msoft-float. */
22310 rs6000_function_value (const_tree valtype
,
22311 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
22312 bool outgoing ATTRIBUTE_UNUSED
)
22315 unsigned int regno
;
22316 machine_mode elt_mode
;
22319 /* Special handling for structs in darwin64. */
22321 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
22323 CUMULATIVE_ARGS valcum
;
22327 valcum
.fregno
= FP_ARG_MIN_REG
;
22328 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
22329 /* Do a trial code generation as if this were going to be passed as
22330 an argument; if any part goes in memory, we return NULL. */
22331 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
22334 /* Otherwise fall through to standard ABI rules. */
22337 mode
= TYPE_MODE (valtype
);
22339 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22340 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
22342 int first_reg
, n_regs
;
22344 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
22346 /* _Decimal128 must use even/odd register pairs. */
22347 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
22348 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
22352 first_reg
= ALTIVEC_ARG_RETURN
;
22356 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
22359 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22360 if (TARGET_32BIT
&& TARGET_POWERPC64
)
22369 int count
= GET_MODE_SIZE (mode
) / 4;
22370 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
22373 if ((INTEGRAL_TYPE_P (valtype
)
22374 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
22375 || POINTER_TYPE_P (valtype
))
22376 mode
= TARGET_32BIT
? SImode
: DImode
;
22378 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
22379 /* _Decimal128 must use an even/odd register pair. */
22380 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
22381 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
22382 && !FLOAT128_VECTOR_P (mode
))
22383 regno
= FP_ARG_RETURN
;
22384 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
22385 && targetm
.calls
.split_complex_arg
)
22386 return rs6000_complex_function_value (mode
);
22387 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22388 return register is used in both cases, and we won't see V2DImode/V2DFmode
22389 for pure altivec, combine the two cases. */
22390 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| FLOAT128_VECTOR_P (mode
))
22391 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
22392 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
22393 regno
= ALTIVEC_ARG_RETURN
;
22395 regno
= GP_ARG_RETURN
;
22397 return gen_rtx_REG (mode
, regno
);
22400 /* Define how to find the value returned by a library function
22401 assuming the value has mode MODE. */
22403 rs6000_libcall_value (machine_mode mode
)
22405 unsigned int regno
;
22407 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22408 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
22409 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
22411 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
22412 /* _Decimal128 must use an even/odd register pair. */
22413 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
22414 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
22415 regno
= FP_ARG_RETURN
;
22416 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22417 return register is used in both cases, and we won't see V2DImode/V2DFmode
22418 for pure altivec, combine the two cases. */
22419 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
22420 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
22421 regno
= ALTIVEC_ARG_RETURN
;
22422 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
22423 return rs6000_complex_function_value (mode
);
22425 regno
= GP_ARG_RETURN
;
22427 return gen_rtx_REG (mode
, regno
);
22430 /* Compute register pressure classes. We implement the target hook to avoid
22431 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22432 lead to incorrect estimates of number of available registers and therefor
22433 increased register pressure/spill. */
22435 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
22440 pressure_classes
[n
++] = GENERAL_REGS
;
22442 pressure_classes
[n
++] = VSX_REGS
;
22445 if (TARGET_ALTIVEC
)
22446 pressure_classes
[n
++] = ALTIVEC_REGS
;
22447 if (TARGET_HARD_FLOAT
)
22448 pressure_classes
[n
++] = FLOAT_REGS
;
22450 pressure_classes
[n
++] = CR_REGS
;
22451 pressure_classes
[n
++] = SPECIAL_REGS
;
22456 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22457 Frame pointer elimination is automatically handled.
22459 For the RS/6000, if frame pointer elimination is being done, we would like
22460 to convert ap into fp, not sp.
22462 We need r30 if -mminimal-toc was specified, and there are constant pool
22466 rs6000_can_eliminate (const int from
, const int to
)
22468 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
22469 ? ! frame_pointer_needed
22470 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
22471 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
22472 || constant_pool_empty_p ()
22476 /* Define the offset between two registers, FROM to be eliminated and its
22477 replacement TO, at the start of a routine. */
22479 rs6000_initial_elimination_offset (int from
, int to
)
22481 rs6000_stack_t
*info
= rs6000_stack_info ();
22482 HOST_WIDE_INT offset
;
22484 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
22485 offset
= info
->push_p
? 0 : -info
->total_size
;
22486 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
22488 offset
= info
->push_p
? 0 : -info
->total_size
;
22489 if (FRAME_GROWS_DOWNWARD
)
22490 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
22492 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
22493 offset
= FRAME_GROWS_DOWNWARD
22494 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
22496 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
22497 offset
= info
->total_size
;
22498 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
22499 offset
= info
->push_p
? info
->total_size
: 0;
22500 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
22503 gcc_unreachable ();
22508 /* Fill in sizes of registers used by unwinder. */
22511 rs6000_init_dwarf_reg_sizes_extra (tree address
)
22513 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
22516 machine_mode mode
= TYPE_MODE (char_type_node
);
22517 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
22518 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
22519 rtx value
= gen_int_mode (16, mode
);
22521 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22522 The unwinder still needs to know the size of Altivec registers. */
22524 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
22526 int column
= DWARF_REG_TO_UNWIND_COLUMN
22527 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
22528 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
22530 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
22535 /* Map internal gcc register numbers to debug format register numbers.
22536 FORMAT specifies the type of debug register number to use:
22537 0 -- debug information, except for frame-related sections
22538 1 -- DWARF .debug_frame section
22539 2 -- DWARF .eh_frame section */
22542 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
22544 /* On some platforms, we use the standard DWARF register
22545 numbering for .debug_info and .debug_frame. */
22546 if ((format
== 0 && write_symbols
== DWARF2_DEBUG
) || format
== 1)
22548 #ifdef RS6000_USE_DWARF_NUMBERING
22551 if (FP_REGNO_P (regno
))
22552 return regno
- FIRST_FPR_REGNO
+ 32;
22553 if (ALTIVEC_REGNO_P (regno
))
22554 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
22555 if (regno
== LR_REGNO
)
22557 if (regno
== CTR_REGNO
)
22559 if (regno
== CA_REGNO
)
22560 return 101; /* XER */
22561 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22562 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22563 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22564 to the DWARF reg for CR. */
22565 if (format
== 1 && regno
== CR2_REGNO
)
22567 if (CR_REGNO_P (regno
))
22568 return regno
- CR0_REGNO
+ 86;
22569 if (regno
== VRSAVE_REGNO
)
22571 if (regno
== VSCR_REGNO
)
22574 /* These do not make much sense. */
22575 if (regno
== FRAME_POINTER_REGNUM
)
22577 if (regno
== ARG_POINTER_REGNUM
)
22582 gcc_unreachable ();
22586 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22587 information, and also for .eh_frame. */
22588 /* Translate the regnos to their numbers in GCC 7 (and before). */
22591 if (FP_REGNO_P (regno
))
22592 return regno
- FIRST_FPR_REGNO
+ 32;
22593 if (ALTIVEC_REGNO_P (regno
))
22594 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
22595 if (regno
== LR_REGNO
)
22597 if (regno
== CTR_REGNO
)
22599 if (regno
== CA_REGNO
)
22600 return 76; /* XER */
22601 if (CR_REGNO_P (regno
))
22602 return regno
- CR0_REGNO
+ 68;
22603 if (regno
== VRSAVE_REGNO
)
22605 if (regno
== VSCR_REGNO
)
22608 if (regno
== FRAME_POINTER_REGNUM
)
22610 if (regno
== ARG_POINTER_REGNUM
)
22615 gcc_unreachable ();
22618 /* target hook eh_return_filter_mode */
22619 static scalar_int_mode
22620 rs6000_eh_return_filter_mode (void)
22622 return TARGET_32BIT
? SImode
: word_mode
;
22625 /* Target hook for translate_mode_attribute. */
22626 static machine_mode
22627 rs6000_translate_mode_attribute (machine_mode mode
)
22629 if ((FLOAT128_IEEE_P (mode
)
22630 && ieee128_float_type_node
== long_double_type_node
)
22631 || (FLOAT128_IBM_P (mode
)
22632 && ibm128_float_type_node
== long_double_type_node
))
22633 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
22637 /* Target hook for scalar_mode_supported_p. */
22639 rs6000_scalar_mode_supported_p (scalar_mode mode
)
22641 /* -m32 does not support TImode. This is the default, from
22642 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22643 same ABI as for -m32. But default_scalar_mode_supported_p allows
22644 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22645 for -mpowerpc64. */
22646 if (TARGET_32BIT
&& mode
== TImode
)
22649 if (DECIMAL_FLOAT_MODE_P (mode
))
22650 return default_decimal_float_supported_p ();
22651 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
22654 return default_scalar_mode_supported_p (mode
);
22657 /* Target hook for vector_mode_supported_p. */
22659 rs6000_vector_mode_supported_p (machine_mode mode
)
22661 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22662 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22664 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
22671 /* Target hook for floatn_mode. */
22672 static opt_scalar_float_mode
22673 rs6000_floatn_mode (int n
, bool extended
)
22683 if (TARGET_FLOAT128_TYPE
)
22684 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
22686 return opt_scalar_float_mode ();
22689 return opt_scalar_float_mode ();
22692 /* Those are the only valid _FloatNx types. */
22693 gcc_unreachable ();
22707 if (TARGET_FLOAT128_TYPE
)
22708 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
22710 return opt_scalar_float_mode ();
22713 return opt_scalar_float_mode ();
22719 /* Target hook for c_mode_for_suffix. */
22720 static machine_mode
22721 rs6000_c_mode_for_suffix (char suffix
)
22723 if (TARGET_FLOAT128_TYPE
)
22725 if (suffix
== 'q' || suffix
== 'Q')
22726 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
22728 /* At the moment, we are not defining a suffix for IBM extended double.
22729 If/when the default for -mabi=ieeelongdouble is changed, and we want
22730 to support __ibm128 constants in legacy library code, we may need to
22731 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22732 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22733 __float80 constants. */
22739 /* Target hook for invalid_arg_for_unprototyped_fn. */
22740 static const char *
22741 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
22743 return (!rs6000_darwin64_abi
22745 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
22746 && (funcdecl
== NULL_TREE
22747 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
22748 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
22749 ? N_("AltiVec argument passed to unprototyped function")
22753 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22754 setup by using __stack_chk_fail_local hidden function instead of
22755 calling __stack_chk_fail directly. Otherwise it is better to call
22756 __stack_chk_fail directly. */
22758 static tree ATTRIBUTE_UNUSED
22759 rs6000_stack_protect_fail (void)
22761 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
22762 ? default_hidden_stack_protect_fail ()
22763 : default_external_stack_protect_fail ();
22766 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22769 static unsigned HOST_WIDE_INT
22770 rs6000_asan_shadow_offset (void)
22772 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
22776 /* Mask options that we want to support inside of attribute((target)) and
22777 #pragma GCC target operations. Note, we do not include things like
22778 64/32-bit, endianness, hard/soft floating point, etc. that would have
22779 different calling sequences. */
22781 struct rs6000_opt_mask
{
22782 const char *name
; /* option name */
22783 HOST_WIDE_INT mask
; /* mask to set */
22784 bool invert
; /* invert sense of mask */
22785 bool valid_target
; /* option is a target option */
22788 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
22790 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
22791 { "cmpb", OPTION_MASK_CMPB
, false, true },
22792 { "crypto", OPTION_MASK_CRYPTO
, false, true },
22793 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
22794 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
22795 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
22797 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
22798 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
22799 { "fprnd", OPTION_MASK_FPRND
, false, true },
22800 { "future", OPTION_MASK_FUTURE
, false, true },
22801 { "hard-dfp", OPTION_MASK_DFP
, false, true },
22802 { "htm", OPTION_MASK_HTM
, false, true },
22803 { "isel", OPTION_MASK_ISEL
, false, true },
22804 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
22805 { "mfpgpr", 0, false, true },
22806 { "modulo", OPTION_MASK_MODULO
, false, true },
22807 { "mulhw", OPTION_MASK_MULHW
, false, true },
22808 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
22809 { "pcrel", OPTION_MASK_PCREL
, false, true },
22810 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
22811 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
22812 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
22813 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
22814 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
22815 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
22816 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
22817 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
22818 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
22819 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
22820 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR
, false, true },
22821 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
22822 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
22823 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
22824 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
22825 { "string", 0, false, true },
22826 { "update", OPTION_MASK_NO_UPDATE
, true , true },
22827 { "vsx", OPTION_MASK_VSX
, false, true },
22828 #ifdef OPTION_MASK_64BIT
22830 { "aix64", OPTION_MASK_64BIT
, false, false },
22831 { "aix32", OPTION_MASK_64BIT
, true, false },
22833 { "64", OPTION_MASK_64BIT
, false, false },
22834 { "32", OPTION_MASK_64BIT
, true, false },
22837 #ifdef OPTION_MASK_EABI
22838 { "eabi", OPTION_MASK_EABI
, false, false },
22840 #ifdef OPTION_MASK_LITTLE_ENDIAN
22841 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
22842 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
22844 #ifdef OPTION_MASK_RELOCATABLE
22845 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
22847 #ifdef OPTION_MASK_STRICT_ALIGN
22848 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
22850 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
22851 { "string", 0, false, false },
22854 /* Builtin mask mapping for printing the flags. */
22855 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
22857 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
22858 { "vsx", RS6000_BTM_VSX
, false, false },
22859 { "fre", RS6000_BTM_FRE
, false, false },
22860 { "fres", RS6000_BTM_FRES
, false, false },
22861 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
22862 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
22863 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
22864 { "cell", RS6000_BTM_CELL
, false, false },
22865 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
22866 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
22867 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
22868 { "crypto", RS6000_BTM_CRYPTO
, false, false },
22869 { "htm", RS6000_BTM_HTM
, false, false },
22870 { "hard-dfp", RS6000_BTM_DFP
, false, false },
22871 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
22872 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
22873 { "powerpc64", RS6000_BTM_POWERPC64
, false, false },
22874 { "float128", RS6000_BTM_FLOAT128
, false, false },
22875 { "float128-hw", RS6000_BTM_FLOAT128_HW
,false, false },
22878 /* Option variables that we want to support inside attribute((target)) and
22879 #pragma GCC target operations. */
22881 struct rs6000_opt_var
{
22882 const char *name
; /* option name */
22883 size_t global_offset
; /* offset of the option in global_options. */
22884 size_t target_offset
; /* offset of the option in target options. */
22887 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
22890 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
22891 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
22892 { "avoid-indexed-addresses",
22893 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
22894 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
22896 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
22897 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
22898 { "optimize-swaps",
22899 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
22900 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
22901 { "allow-movmisalign",
22902 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
22903 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
22905 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
22906 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
22908 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
22909 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
22910 { "align-branch-targets",
22911 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
22912 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
22914 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
22915 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
22917 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
22918 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
22919 { "speculate-indirect-jumps",
22920 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
22921 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
22924 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22925 parsing. Return true if there were no errors. */
22928 rs6000_inner_target_options (tree args
, bool attr_p
)
22932 if (args
== NULL_TREE
)
22935 else if (TREE_CODE (args
) == STRING_CST
)
22937 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
22940 while ((q
= strtok (p
, ",")) != NULL
)
22942 bool error_p
= false;
22943 bool not_valid_p
= false;
22944 const char *cpu_opt
= NULL
;
22947 if (strncmp (q
, "cpu=", 4) == 0)
22949 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
22950 if (cpu_index
>= 0)
22951 rs6000_cpu_index
= cpu_index
;
22958 else if (strncmp (q
, "tune=", 5) == 0)
22960 int tune_index
= rs6000_cpu_name_lookup (q
+5);
22961 if (tune_index
>= 0)
22962 rs6000_tune_index
= tune_index
;
22972 bool invert
= false;
22976 if (strncmp (r
, "no-", 3) == 0)
22982 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
22983 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
22985 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
22987 if (!rs6000_opt_masks
[i
].valid_target
)
22988 not_valid_p
= true;
22992 rs6000_isa_flags_explicit
|= mask
;
22994 /* VSX needs altivec, so -mvsx automagically sets
22995 altivec and disables -mavoid-indexed-addresses. */
22998 if (mask
== OPTION_MASK_VSX
)
23000 mask
|= OPTION_MASK_ALTIVEC
;
23001 TARGET_AVOID_XFORM
= 0;
23005 if (rs6000_opt_masks
[i
].invert
)
23009 rs6000_isa_flags
&= ~mask
;
23011 rs6000_isa_flags
|= mask
;
23016 if (error_p
&& !not_valid_p
)
23018 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
23019 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
23021 size_t j
= rs6000_opt_vars
[i
].global_offset
;
23022 *((int *) ((char *)&global_options
+ j
)) = !invert
;
23024 not_valid_p
= false;
23032 const char *eprefix
, *esuffix
;
23037 eprefix
= "__attribute__((__target__(";
23042 eprefix
= "#pragma GCC target ";
23047 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
23049 else if (not_valid_p
)
23050 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
23052 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
23057 else if (TREE_CODE (args
) == TREE_LIST
)
23061 tree value
= TREE_VALUE (args
);
23064 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
23068 args
= TREE_CHAIN (args
);
23070 while (args
!= NULL_TREE
);
23075 error ("attribute %<target%> argument not a string");
23082 /* Print out the target options as a list for -mdebug=target. */
23085 rs6000_debug_target_options (tree args
, const char *prefix
)
23087 if (args
== NULL_TREE
)
23088 fprintf (stderr
, "%s<NULL>", prefix
);
23090 else if (TREE_CODE (args
) == STRING_CST
)
23092 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
23095 while ((q
= strtok (p
, ",")) != NULL
)
23098 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
23103 else if (TREE_CODE (args
) == TREE_LIST
)
23107 tree value
= TREE_VALUE (args
);
23110 rs6000_debug_target_options (value
, prefix
);
23113 args
= TREE_CHAIN (args
);
23115 while (args
!= NULL_TREE
);
23119 gcc_unreachable ();
23125 /* Hook to validate attribute((target("..."))). */
23128 rs6000_valid_attribute_p (tree fndecl
,
23129 tree
ARG_UNUSED (name
),
23133 struct cl_target_option cur_target
;
23136 tree new_target
, new_optimize
;
23137 tree func_optimize
;
23139 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
23141 if (TARGET_DEBUG_TARGET
)
23143 tree tname
= DECL_NAME (fndecl
);
23144 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
23146 fprintf (stderr
, "function: %.*s\n",
23147 (int) IDENTIFIER_LENGTH (tname
),
23148 IDENTIFIER_POINTER (tname
));
23150 fprintf (stderr
, "function: unknown\n");
23152 fprintf (stderr
, "args:");
23153 rs6000_debug_target_options (args
, " ");
23154 fprintf (stderr
, "\n");
23157 fprintf (stderr
, "flags: 0x%x\n", flags
);
23159 fprintf (stderr
, "--------------------\n");
23162 /* attribute((target("default"))) does nothing, beyond
23163 affecting multi-versioning. */
23164 if (TREE_VALUE (args
)
23165 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
23166 && TREE_CHAIN (args
) == NULL_TREE
23167 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
23170 old_optimize
= build_optimization_node (&global_options
);
23171 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
23173 /* If the function changed the optimization levels as well as setting target
23174 options, start with the optimizations specified. */
23175 if (func_optimize
&& func_optimize
!= old_optimize
)
23176 cl_optimization_restore (&global_options
,
23177 TREE_OPTIMIZATION (func_optimize
));
23179 /* The target attributes may also change some optimization flags, so update
23180 the optimization options if necessary. */
23181 cl_target_option_save (&cur_target
, &global_options
);
23182 rs6000_cpu_index
= rs6000_tune_index
= -1;
23183 ret
= rs6000_inner_target_options (args
, true);
23185 /* Set up any additional state. */
23188 ret
= rs6000_option_override_internal (false);
23189 new_target
= build_target_option_node (&global_options
);
23194 new_optimize
= build_optimization_node (&global_options
);
23201 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
23203 if (old_optimize
!= new_optimize
)
23204 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
23207 cl_target_option_restore (&global_options
, &cur_target
);
23209 if (old_optimize
!= new_optimize
)
23210 cl_optimization_restore (&global_options
,
23211 TREE_OPTIMIZATION (old_optimize
));
23217 /* Hook to validate the current #pragma GCC target and set the state, and
23218 update the macros based on what was changed. If ARGS is NULL, then
23219 POP_TARGET is used to reset the options. */
23222 rs6000_pragma_target_parse (tree args
, tree pop_target
)
23224 tree prev_tree
= build_target_option_node (&global_options
);
23226 struct cl_target_option
*prev_opt
, *cur_opt
;
23227 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
23228 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
23230 if (TARGET_DEBUG_TARGET
)
23232 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
23233 fprintf (stderr
, "args:");
23234 rs6000_debug_target_options (args
, " ");
23235 fprintf (stderr
, "\n");
23239 fprintf (stderr
, "pop_target:\n");
23240 debug_tree (pop_target
);
23243 fprintf (stderr
, "pop_target: <NULL>\n");
23245 fprintf (stderr
, "--------------------\n");
23250 cur_tree
= ((pop_target
)
23252 : target_option_default_node
);
23253 cl_target_option_restore (&global_options
,
23254 TREE_TARGET_OPTION (cur_tree
));
23258 rs6000_cpu_index
= rs6000_tune_index
= -1;
23259 if (!rs6000_inner_target_options (args
, false)
23260 || !rs6000_option_override_internal (false)
23261 || (cur_tree
= build_target_option_node (&global_options
))
23264 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
23265 fprintf (stderr
, "invalid pragma\n");
23271 target_option_current_node
= cur_tree
;
23272 rs6000_activate_target_options (target_option_current_node
);
23274 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23275 change the macros that are defined. */
23276 if (rs6000_target_modify_macros_ptr
)
23278 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
23279 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
23280 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
23282 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
23283 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
23284 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
23286 diff_bumask
= (prev_bumask
^ cur_bumask
);
23287 diff_flags
= (prev_flags
^ cur_flags
);
23289 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
23291 /* Delete old macros. */
23292 rs6000_target_modify_macros_ptr (false,
23293 prev_flags
& diff_flags
,
23294 prev_bumask
& diff_bumask
);
23296 /* Define new macros. */
23297 rs6000_target_modify_macros_ptr (true,
23298 cur_flags
& diff_flags
,
23299 cur_bumask
& diff_bumask
);
23307 /* Remember the last target of rs6000_set_current_function. */
23308 static GTY(()) tree rs6000_previous_fndecl
;
23310 /* Restore target's globals from NEW_TREE and invalidate the
23311 rs6000_previous_fndecl cache. */
23314 rs6000_activate_target_options (tree new_tree
)
23316 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
23317 if (TREE_TARGET_GLOBALS (new_tree
))
23318 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
23319 else if (new_tree
== target_option_default_node
)
23320 restore_target_globals (&default_target_globals
);
23322 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
23323 rs6000_previous_fndecl
= NULL_TREE
;
23326 /* Establish appropriate back-end context for processing the function
23327 FNDECL. The argument might be NULL to indicate processing at top
23328 level, outside of any function scope. */
23330 rs6000_set_current_function (tree fndecl
)
23332 if (TARGET_DEBUG_TARGET
)
23334 fprintf (stderr
, "\n==================== rs6000_set_current_function");
23337 fprintf (stderr
, ", fndecl %s (%p)",
23338 (DECL_NAME (fndecl
)
23339 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
23340 : "<unknown>"), (void *)fndecl
);
23342 if (rs6000_previous_fndecl
)
23343 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
23345 fprintf (stderr
, "\n");
23348 /* Only change the context if the function changes. This hook is called
23349 several times in the course of compiling a function, and we don't want to
23350 slow things down too much or call target_reinit when it isn't safe. */
23351 if (fndecl
== rs6000_previous_fndecl
)
23355 if (rs6000_previous_fndecl
== NULL_TREE
)
23356 old_tree
= target_option_current_node
;
23357 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
23358 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
23360 old_tree
= target_option_default_node
;
23363 if (fndecl
== NULL_TREE
)
23365 if (old_tree
!= target_option_current_node
)
23366 new_tree
= target_option_current_node
;
23368 new_tree
= NULL_TREE
;
23372 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
23373 if (new_tree
== NULL_TREE
)
23374 new_tree
= target_option_default_node
;
23377 if (TARGET_DEBUG_TARGET
)
23381 fprintf (stderr
, "\nnew fndecl target specific options:\n");
23382 debug_tree (new_tree
);
23387 fprintf (stderr
, "\nold fndecl target specific options:\n");
23388 debug_tree (old_tree
);
23391 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
23392 fprintf (stderr
, "--------------------\n");
23395 if (new_tree
&& old_tree
!= new_tree
)
23396 rs6000_activate_target_options (new_tree
);
23399 rs6000_previous_fndecl
= fndecl
;
23403 /* Save the current options */
23406 rs6000_function_specific_save (struct cl_target_option
*ptr
,
23407 struct gcc_options
*opts
)
23409 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
23410 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
23413 /* Restore the current options */
23416 rs6000_function_specific_restore (struct gcc_options
*opts
,
23417 struct cl_target_option
*ptr
)
23420 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
23421 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
23422 (void) rs6000_option_override_internal (false);
23425 /* Print the current options */
23428 rs6000_function_specific_print (FILE *file
, int indent
,
23429 struct cl_target_option
*ptr
)
23431 rs6000_print_isa_options (file
, indent
, "Isa options set",
23432 ptr
->x_rs6000_isa_flags
);
23434 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
23435 ptr
->x_rs6000_isa_flags_explicit
);
23438 /* Helper function to print the current isa or misc options on a line. */
23441 rs6000_print_options_internal (FILE *file
,
23443 const char *string
,
23444 HOST_WIDE_INT flags
,
23445 const char *prefix
,
23446 const struct rs6000_opt_mask
*opts
,
23447 size_t num_elements
)
23450 size_t start_column
= 0;
23452 size_t max_column
= 120;
23453 size_t prefix_len
= strlen (prefix
);
23454 size_t comma_len
= 0;
23455 const char *comma
= "";
23458 start_column
+= fprintf (file
, "%*s", indent
, "");
23462 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
23466 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
23468 /* Print the various mask options. */
23469 cur_column
= start_column
;
23470 for (i
= 0; i
< num_elements
; i
++)
23472 bool invert
= opts
[i
].invert
;
23473 const char *name
= opts
[i
].name
;
23474 const char *no_str
= "";
23475 HOST_WIDE_INT mask
= opts
[i
].mask
;
23476 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
23480 if ((flags
& mask
) == 0)
23483 len
+= sizeof ("no-") - 1;
23491 if ((flags
& mask
) != 0)
23494 len
+= sizeof ("no-") - 1;
23501 if (cur_column
> max_column
)
23503 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
23504 cur_column
= start_column
+ len
;
23508 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
23510 comma_len
= sizeof (", ") - 1;
23513 fputs ("\n", file
);
23516 /* Helper function to print the current isa options on a line. */
23519 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
23520 HOST_WIDE_INT flags
)
23522 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
23523 &rs6000_opt_masks
[0],
23524 ARRAY_SIZE (rs6000_opt_masks
));
23528 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
23529 HOST_WIDE_INT flags
)
23531 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
23532 &rs6000_builtin_mask_names
[0],
23533 ARRAY_SIZE (rs6000_builtin_mask_names
));
23536 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23537 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23538 -mupper-regs-df, etc.).
23540 If the user used -mno-power8-vector, we need to turn off all of the implicit
23541 ISA 2.07 and 3.0 options that relate to the vector unit.
23543 If the user used -mno-power9-vector, we need to turn off all of the implicit
23544 ISA 3.0 options that relate to the vector unit.
23546 This function does not handle explicit options such as the user specifying
23547 -mdirect-move. These are handled in rs6000_option_override_internal, and
23548 the appropriate error is given if needed.
23550 We return a mask of all of the implicit options that should not be enabled
23553 static HOST_WIDE_INT
23554 rs6000_disable_incompatible_switches (void)
23556 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
23559 static const struct {
23560 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
23561 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
23562 const char *const name
; /* name of the switch. */
23564 { OPTION_MASK_FUTURE
, OTHER_FUTURE_MASKS
, "future" },
23565 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
23566 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
23567 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
23570 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
23572 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
23574 if ((rs6000_isa_flags
& no_flag
) == 0
23575 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
23577 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
23578 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
23584 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
23585 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
23587 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
23588 error ("%<-mno-%s%> turns off %<-m%s%>",
23590 rs6000_opt_masks
[j
].name
);
23593 gcc_assert (!set_flags
);
23596 rs6000_isa_flags
&= ~dep_flags
;
23597 ignore_masks
|= no_flag
| dep_flags
;
23601 return ignore_masks
;
23605 /* Helper function for printing the function name when debugging. */
23607 static const char *
23608 get_decl_name (tree fn
)
23615 name
= DECL_NAME (fn
);
23617 return "<no-name>";
23619 return IDENTIFIER_POINTER (name
);
23622 /* Return the clone id of the target we are compiling code for in a target
23623 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23624 the priority list for the target clones (ordered from lowest to
23628 rs6000_clone_priority (tree fndecl
)
23630 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
23631 HOST_WIDE_INT isa_masks
;
23632 int ret
= CLONE_DEFAULT
;
23633 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
23634 const char *attrs_str
= NULL
;
23636 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
23637 attrs_str
= TREE_STRING_POINTER (attrs
);
23639 /* Return priority zero for default function. Return the ISA needed for the
23640 function if it is not the default. */
23641 if (strcmp (attrs_str
, "default") != 0)
23643 if (fn_opts
== NULL_TREE
)
23644 fn_opts
= target_option_default_node
;
23646 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
23647 isa_masks
= rs6000_isa_flags
;
23649 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
23651 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
23652 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
23656 if (TARGET_DEBUG_TARGET
)
23657 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
23658 get_decl_name (fndecl
), ret
);
23663 /* This compares the priority of target features in function DECL1 and DECL2.
23664 It returns positive value if DECL1 is higher priority, negative value if
23665 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23666 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23669 rs6000_compare_version_priority (tree decl1
, tree decl2
)
23671 int priority1
= rs6000_clone_priority (decl1
);
23672 int priority2
= rs6000_clone_priority (decl2
);
23673 int ret
= priority1
- priority2
;
23675 if (TARGET_DEBUG_TARGET
)
23676 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
23677 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
23682 /* Make a dispatcher declaration for the multi-versioned function DECL.
23683 Calls to DECL function will be replaced with calls to the dispatcher
23684 by the front-end. Returns the decl of the dispatcher function. */
23687 rs6000_get_function_versions_dispatcher (void *decl
)
23689 tree fn
= (tree
) decl
;
23690 struct cgraph_node
*node
= NULL
;
23691 struct cgraph_node
*default_node
= NULL
;
23692 struct cgraph_function_version_info
*node_v
= NULL
;
23693 struct cgraph_function_version_info
*first_v
= NULL
;
23695 tree dispatch_decl
= NULL
;
23697 struct cgraph_function_version_info
*default_version_info
= NULL
;
23698 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
23700 if (TARGET_DEBUG_TARGET
)
23701 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
23702 get_decl_name (fn
));
23704 node
= cgraph_node::get (fn
);
23705 gcc_assert (node
!= NULL
);
23707 node_v
= node
->function_version ();
23708 gcc_assert (node_v
!= NULL
);
23710 if (node_v
->dispatcher_resolver
!= NULL
)
23711 return node_v
->dispatcher_resolver
;
23713 /* Find the default version and make it the first node. */
23715 /* Go to the beginning of the chain. */
23716 while (first_v
->prev
!= NULL
)
23717 first_v
= first_v
->prev
;
23719 default_version_info
= first_v
;
23720 while (default_version_info
!= NULL
)
23722 const tree decl2
= default_version_info
->this_node
->decl
;
23723 if (is_function_default_version (decl2
))
23725 default_version_info
= default_version_info
->next
;
23728 /* If there is no default node, just return NULL. */
23729 if (default_version_info
== NULL
)
23732 /* Make default info the first node. */
23733 if (first_v
!= default_version_info
)
23735 default_version_info
->prev
->next
= default_version_info
->next
;
23736 if (default_version_info
->next
)
23737 default_version_info
->next
->prev
= default_version_info
->prev
;
23738 first_v
->prev
= default_version_info
;
23739 default_version_info
->next
= first_v
;
23740 default_version_info
->prev
= NULL
;
23743 default_node
= default_version_info
->this_node
;
23745 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23746 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
23747 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23748 "exports hardware capability bits");
23751 if (targetm
.has_ifunc_p ())
23753 struct cgraph_function_version_info
*it_v
= NULL
;
23754 struct cgraph_node
*dispatcher_node
= NULL
;
23755 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
23757 /* Right now, the dispatching is done via ifunc. */
23758 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
23760 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
23761 gcc_assert (dispatcher_node
!= NULL
);
23762 dispatcher_node
->dispatcher_function
= 1;
23763 dispatcher_version_info
23764 = dispatcher_node
->insert_new_function_version ();
23765 dispatcher_version_info
->next
= default_version_info
;
23766 dispatcher_node
->definition
= 1;
23768 /* Set the dispatcher for all the versions. */
23769 it_v
= default_version_info
;
23770 while (it_v
!= NULL
)
23772 it_v
->dispatcher_resolver
= dispatch_decl
;
23778 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
23779 "multiversioning needs ifunc which is not supported "
23784 return dispatch_decl
;
23787 /* Make the resolver function decl to dispatch the versions of a multi-
23788 versioned function, DEFAULT_DECL. Create an empty basic block in the
23789 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23793 make_resolver_func (const tree default_decl
,
23794 const tree dispatch_decl
,
23795 basic_block
*empty_bb
)
23797 /* Make the resolver function static. The resolver function returns
23799 tree decl_name
= clone_function_name (default_decl
, "resolver");
23800 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
23801 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
23802 tree decl
= build_fn_decl (resolver_name
, type
);
23803 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
23805 DECL_NAME (decl
) = decl_name
;
23806 TREE_USED (decl
) = 1;
23807 DECL_ARTIFICIAL (decl
) = 1;
23808 DECL_IGNORED_P (decl
) = 0;
23809 TREE_PUBLIC (decl
) = 0;
23810 DECL_UNINLINABLE (decl
) = 1;
23812 /* Resolver is not external, body is generated. */
23813 DECL_EXTERNAL (decl
) = 0;
23814 DECL_EXTERNAL (dispatch_decl
) = 0;
23816 DECL_CONTEXT (decl
) = NULL_TREE
;
23817 DECL_INITIAL (decl
) = make_node (BLOCK
);
23818 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
23820 /* Build result decl and add to function_decl. */
23821 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
23822 DECL_CONTEXT (t
) = decl
;
23823 DECL_ARTIFICIAL (t
) = 1;
23824 DECL_IGNORED_P (t
) = 1;
23825 DECL_RESULT (decl
) = t
;
23827 gimplify_function_tree (decl
);
23828 push_cfun (DECL_STRUCT_FUNCTION (decl
));
23829 *empty_bb
= init_lowered_empty_function (decl
, false,
23830 profile_count::uninitialized ());
23832 cgraph_node::add_new_function (decl
, true);
23833 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
23837 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23838 DECL_ATTRIBUTES (dispatch_decl
)
23839 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
23841 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
23846 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23847 return a pointer to VERSION_DECL if we are running on a machine that
23848 supports the index CLONE_ISA hardware architecture bits. This function will
23849 be called during version dispatch to decide which function version to
23850 execute. It returns the basic block at the end, to which more conditions
23854 add_condition_to_bb (tree function_decl
, tree version_decl
,
23855 int clone_isa
, basic_block new_bb
)
23857 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
23859 gcc_assert (new_bb
!= NULL
);
23860 gimple_seq gseq
= bb_seq (new_bb
);
23863 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
23864 build_fold_addr_expr (version_decl
));
23865 tree result_var
= create_tmp_var (ptr_type_node
);
23866 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
23867 gimple
*return_stmt
= gimple_build_return (result_var
);
23869 if (clone_isa
== CLONE_DEFAULT
)
23871 gimple_seq_add_stmt (&gseq
, convert_stmt
);
23872 gimple_seq_add_stmt (&gseq
, return_stmt
);
23873 set_bb_seq (new_bb
, gseq
);
23874 gimple_set_bb (convert_stmt
, new_bb
);
23875 gimple_set_bb (return_stmt
, new_bb
);
23880 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
23881 tree cond_var
= create_tmp_var (bool_int_type_node
);
23882 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BUILTIN_CPU_SUPPORTS
];
23883 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
23884 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
23885 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
23886 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
23888 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
23889 gimple_set_bb (call_cond_stmt
, new_bb
);
23890 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
23892 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
23893 NULL_TREE
, NULL_TREE
);
23894 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
23895 gimple_set_bb (if_else_stmt
, new_bb
);
23896 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
23898 gimple_seq_add_stmt (&gseq
, convert_stmt
);
23899 gimple_seq_add_stmt (&gseq
, return_stmt
);
23900 set_bb_seq (new_bb
, gseq
);
23902 basic_block bb1
= new_bb
;
23903 edge e12
= split_block (bb1
, if_else_stmt
);
23904 basic_block bb2
= e12
->dest
;
23905 e12
->flags
&= ~EDGE_FALLTHRU
;
23906 e12
->flags
|= EDGE_TRUE_VALUE
;
23908 edge e23
= split_block (bb2
, return_stmt
);
23909 gimple_set_bb (convert_stmt
, bb2
);
23910 gimple_set_bb (return_stmt
, bb2
);
23912 basic_block bb3
= e23
->dest
;
23913 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
23916 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
23922 /* This function generates the dispatch function for multi-versioned functions.
23923 DISPATCH_DECL is the function which will contain the dispatch logic.
23924 FNDECLS are the function choices for dispatch, and is a tree chain.
23925 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23926 code is generated. */
23929 dispatch_function_versions (tree dispatch_decl
,
23931 basic_block
*empty_bb
)
23935 vec
<tree
> *fndecls
;
23936 tree clones
[CLONE_MAX
];
23938 if (TARGET_DEBUG_TARGET
)
23939 fputs ("dispatch_function_versions, top\n", stderr
);
23941 gcc_assert (dispatch_decl
!= NULL
23942 && fndecls_p
!= NULL
23943 && empty_bb
!= NULL
);
23945 /* fndecls_p is actually a vector. */
23946 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
23948 /* At least one more version other than the default. */
23949 gcc_assert (fndecls
->length () >= 2);
23951 /* The first version in the vector is the default decl. */
23952 memset ((void *) clones
, '\0', sizeof (clones
));
23953 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
23955 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23956 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23957 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23958 recent glibc. If we ever need to call __builtin_cpu_init, we would need
23959 to insert the code here to do the call. */
23961 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
23963 int priority
= rs6000_clone_priority (ele
);
23964 if (!clones
[priority
])
23965 clones
[priority
] = ele
;
23968 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
23971 if (TARGET_DEBUG_TARGET
)
23972 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
23973 ix
, get_decl_name (clones
[ix
]));
23975 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
23982 /* Generate the dispatching code body to dispatch multi-versioned function
23983 DECL. The target hook is called to process the "target" attributes and
23984 provide the code to dispatch the right function at run-time. NODE points
23985 to the dispatcher decl whose body will be created. */
23988 rs6000_generate_version_dispatcher_body (void *node_p
)
23991 basic_block empty_bb
;
23992 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
23993 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
23995 if (ninfo
->dispatcher_resolver
)
23996 return ninfo
->dispatcher_resolver
;
23998 /* node is going to be an alias, so remove the finalized bit. */
23999 node
->definition
= false;
24001 /* The first version in the chain corresponds to the default version. */
24002 ninfo
->dispatcher_resolver
= resolver
24003 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
24005 if (TARGET_DEBUG_TARGET
)
24006 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
24007 get_decl_name (resolver
));
24009 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
24010 auto_vec
<tree
, 2> fn_ver_vec
;
24012 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
24014 vinfo
= vinfo
->next
)
24016 struct cgraph_node
*version
= vinfo
->this_node
;
24017 /* Check for virtual functions here again, as by this time it should
24018 have been determined if this function needs a vtable index or
24019 not. This happens for methods in derived classes that override
24020 virtual methods in base classes but are not explicitly marked as
24022 if (DECL_VINDEX (version
->decl
))
24023 sorry ("Virtual function multiversioning not supported");
24025 fn_ver_vec
.safe_push (version
->decl
);
24028 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
24029 cgraph_edge::rebuild_edges ();
24035 /* Hook to determine if one function can safely inline another. */
24038 rs6000_can_inline_p (tree caller
, tree callee
)
24041 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
24042 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
24044 /* If the callee has no option attributes, then it is ok to inline. */
24050 HOST_WIDE_INT caller_isa
;
24051 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24052 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
24053 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
24055 /* If the caller has option attributes, then use them.
24056 Otherwise, use the command line options. */
24058 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
24060 caller_isa
= rs6000_isa_flags
;
24062 /* The callee's options must be a subset of the caller's options, i.e.
24063 a vsx function may inline an altivec function, but a no-vsx function
24064 must not inline a vsx function. However, for those options that the
24065 callee has explicitly enabled or disabled, then we must enforce that
24066 the callee's and caller's options match exactly; see PR70010. */
24067 if (((caller_isa
& callee_isa
) == callee_isa
)
24068 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
24072 if (TARGET_DEBUG_TARGET
)
24073 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24074 get_decl_name (caller
), get_decl_name (callee
),
24075 (ret
? "can" : "cannot"));
24080 /* Allocate a stack temp and fixup the address so it meets the particular
24081 memory requirements (either offetable or REG+REG addressing). */
24084 rs6000_allocate_stack_temp (machine_mode mode
,
24085 bool offsettable_p
,
24088 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
24089 rtx addr
= XEXP (stack
, 0);
24090 int strict_p
= reload_completed
;
24092 if (!legitimate_indirect_address_p (addr
, strict_p
))
24095 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
24096 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
24098 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
24099 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
24105 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24106 convert to such a form to deal with memory reference instructions
24107 like STFIWX and LDBRX that only take reg+reg addressing. */
24110 rs6000_force_indexed_or_indirect_mem (rtx x
)
24112 machine_mode mode
= GET_MODE (x
);
24114 gcc_assert (MEM_P (x
));
24115 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
24117 rtx addr
= XEXP (x
, 0);
24118 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
24120 rtx reg
= XEXP (addr
, 0);
24121 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
24122 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
24123 gcc_assert (REG_P (reg
));
24124 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
24127 else if (GET_CODE (addr
) == PRE_MODIFY
)
24129 rtx reg
= XEXP (addr
, 0);
24130 rtx expr
= XEXP (addr
, 1);
24131 gcc_assert (REG_P (reg
));
24132 gcc_assert (GET_CODE (expr
) == PLUS
);
24133 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
24137 if (GET_CODE (addr
) == PLUS
)
24139 rtx op0
= XEXP (addr
, 0);
24140 rtx op1
= XEXP (addr
, 1);
24141 op0
= force_reg (Pmode
, op0
);
24142 op1
= force_reg (Pmode
, op1
);
24143 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
24146 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
24152 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24154 On the RS/6000, all integer constants are acceptable, most won't be valid
24155 for particular insns, though. Only easy FP constants are acceptable. */
24158 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
24160 if (TARGET_ELF
&& tls_referenced_p (x
))
24163 if (CONST_DOUBLE_P (x
))
24164 return easy_fp_constant (x
, mode
);
24166 if (GET_CODE (x
) == CONST_VECTOR
)
24167 return easy_vector_constant (x
, mode
);
24173 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24176 chain_already_loaded (rtx_insn
*last
)
24178 for (; last
!= NULL
; last
= PREV_INSN (last
))
24180 if (NONJUMP_INSN_P (last
))
24182 rtx patt
= PATTERN (last
);
24184 if (GET_CODE (patt
) == SET
)
24186 rtx lhs
= XEXP (patt
, 0);
24188 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
24196 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24199 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
24201 rtx func
= func_desc
;
24202 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
24203 rtx toc_load
= NULL_RTX
;
24204 rtx toc_restore
= NULL_RTX
;
24206 rtx abi_reg
= NULL_RTX
;
24210 bool is_pltseq_longcall
;
24213 tlsarg
= global_tlsarg
;
24215 /* Handle longcall attributes. */
24216 is_pltseq_longcall
= false;
24217 if ((INTVAL (cookie
) & CALL_LONG
) != 0
24218 && GET_CODE (func_desc
) == SYMBOL_REF
)
24220 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
24222 is_pltseq_longcall
= true;
24225 /* Handle indirect calls. */
24226 if (!SYMBOL_REF_P (func
)
24227 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
24229 if (!rs6000_pcrel_p (cfun
))
24231 /* Save the TOC into its reserved slot before the call,
24232 and prepare to restore it after the call. */
24233 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
24234 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
24235 gen_rtvec (1, stack_toc_offset
),
24237 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
24239 /* Can we optimize saving the TOC in the prologue or
24240 do we need to do it at every call? */
24241 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
24242 cfun
->machine
->save_toc_in_prologue
= true;
24245 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
24246 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
24247 gen_rtx_PLUS (Pmode
, stack_ptr
,
24248 stack_toc_offset
));
24249 MEM_VOLATILE_P (stack_toc_mem
) = 1;
24250 if (is_pltseq_longcall
)
24252 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
24253 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24254 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
24257 emit_move_insn (stack_toc_mem
, toc_reg
);
24261 if (DEFAULT_ABI
== ABI_ELFv2
)
24263 /* A function pointer in the ELFv2 ABI is just a plain address, but
24264 the ABI requires it to be loaded into r12 before the call. */
24265 func_addr
= gen_rtx_REG (Pmode
, 12);
24266 if (!rtx_equal_p (func_addr
, func
))
24267 emit_move_insn (func_addr
, func
);
24268 abi_reg
= func_addr
;
24269 /* Indirect calls via CTR are strongly preferred over indirect
24270 calls via LR, so move the address there. Needed to mark
24271 this insn for linker plt sequence editing too. */
24272 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
24273 if (is_pltseq_longcall
)
24275 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
24276 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24277 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
24278 v
= gen_rtvec (2, func_addr
, func_desc
);
24279 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24282 emit_move_insn (func_addr
, abi_reg
);
24286 /* A function pointer under AIX is a pointer to a data area whose
24287 first word contains the actual address of the function, whose
24288 second word contains a pointer to its TOC, and whose third word
24289 contains a value to place in the static chain register (r11).
24290 Note that if we load the static chain, our "trampoline" need
24291 not have any executable code. */
24293 /* Load up address of the actual function. */
24294 func
= force_reg (Pmode
, func
);
24295 func_addr
= gen_reg_rtx (Pmode
);
24296 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
24298 /* Indirect calls via CTR are strongly preferred over indirect
24299 calls via LR, so move the address there. */
24300 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
24301 emit_move_insn (ctr_reg
, func_addr
);
24302 func_addr
= ctr_reg
;
24304 /* Prepare to load the TOC of the called function. Note that the
24305 TOC load must happen immediately before the actual call so
24306 that unwinding the TOC registers works correctly. See the
24307 comment in frob_update_context. */
24308 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
24309 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
24310 gen_rtx_PLUS (Pmode
, func
,
24312 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
24314 /* If we have a static chain, load it up. But, if the call was
24315 originally direct, the 3rd word has not been written since no
24316 trampoline has been built, so we ought not to load it, lest we
24317 override a static chain value. */
24318 if (!(GET_CODE (func_desc
) == SYMBOL_REF
24319 && SYMBOL_REF_FUNCTION_P (func_desc
))
24320 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24321 && !chain_already_loaded (get_current_sequence ()->next
->last
))
24323 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
24324 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
24325 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
24326 gen_rtx_PLUS (Pmode
, func
,
24328 emit_move_insn (sc_reg
, func_sc_mem
);
24335 /* No TOC register needed for calls from PC-relative callers. */
24336 if (!rs6000_pcrel_p (cfun
))
24337 /* Direct calls use the TOC: for local calls, the callee will
24338 assume the TOC register is set; for non-local calls, the
24339 PLT stub needs the TOC register. */
24344 /* Create the call. */
24345 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
24346 if (value
!= NULL_RTX
)
24347 call
[0] = gen_rtx_SET (value
, call
[0]);
24351 call
[n_call
++] = toc_load
;
24353 call
[n_call
++] = toc_restore
;
24355 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
24357 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
24358 insn
= emit_call_insn (insn
);
24360 /* Mention all registers defined by the ABI to hold information
24361 as uses in CALL_INSN_FUNCTION_USAGE. */
24363 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
24366 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24369 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
24374 gcc_assert (INTVAL (cookie
) == 0);
24377 tlsarg
= global_tlsarg
;
24379 /* Create the call. */
24380 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_desc
), tlsarg
);
24381 if (value
!= NULL_RTX
)
24382 call
[0] = gen_rtx_SET (value
, call
[0]);
24384 call
[1] = simple_return_rtx
;
24386 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
24387 insn
= emit_call_insn (insn
);
24389 /* Note use of the TOC register. */
24390 if (!rs6000_pcrel_p (cfun
))
24391 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
24392 gen_rtx_REG (Pmode
, TOC_REGNUM
));
24395 /* Expand code to perform a call under the SYSV4 ABI. */
24398 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
24400 rtx func
= func_desc
;
24404 rtx abi_reg
= NULL_RTX
;
24408 tlsarg
= global_tlsarg
;
24410 /* Handle longcall attributes. */
24411 if ((INTVAL (cookie
) & CALL_LONG
) != 0
24412 && GET_CODE (func_desc
) == SYMBOL_REF
)
24414 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
24415 /* If the longcall was implemented as an inline PLT call using
24416 PLT unspecs then func will be REG:r11. If not, func will be
24417 a pseudo reg. The inline PLT call sequence supports lazy
24418 linking (and longcalls to functions in dlopen'd libraries).
24419 The other style of longcalls don't. The lazy linking entry
24420 to the dynamic symbol resolver requires r11 be the function
24421 address (as it is for linker generated PLT stubs). Ensure
24422 r11 stays valid to the bctrl by marking r11 used by the call. */
24427 /* Handle indirect calls. */
24428 if (GET_CODE (func
) != SYMBOL_REF
)
24430 func
= force_reg (Pmode
, func
);
24432 /* Indirect calls via CTR are strongly preferred over indirect
24433 calls via LR, so move the address there. That can't be left
24434 to reload because we want to mark every instruction in an
24435 inline PLT call sequence with a reloc, enabling the linker to
24436 edit the sequence back to a direct call when that makes sense. */
24437 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
24440 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
24441 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24442 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
24443 v
= gen_rtvec (2, func_addr
, func_desc
);
24444 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24447 emit_move_insn (func_addr
, func
);
24452 /* Create the call. */
24453 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
24454 if (value
!= NULL_RTX
)
24455 call
[0] = gen_rtx_SET (value
, call
[0]);
24457 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
24459 if (TARGET_SECURE_PLT
24461 && GET_CODE (func_addr
) == SYMBOL_REF
24462 && !SYMBOL_REF_LOCAL_P (func_addr
))
24463 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
24465 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
24467 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
24468 insn
= emit_call_insn (insn
);
24470 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
24473 /* Expand code to perform a sibling call under the SysV4 ABI. */
24476 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
24478 rtx func
= func_desc
;
24482 rtx abi_reg
= NULL_RTX
;
24485 tlsarg
= global_tlsarg
;
24487 /* Handle longcall attributes. */
24488 if ((INTVAL (cookie
) & CALL_LONG
) != 0
24489 && GET_CODE (func_desc
) == SYMBOL_REF
)
24491 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
24492 /* If the longcall was implemented as an inline PLT call using
24493 PLT unspecs then func will be REG:r11. If not, func will be
24494 a pseudo reg. The inline PLT call sequence supports lazy
24495 linking (and longcalls to functions in dlopen'd libraries).
24496 The other style of longcalls don't. The lazy linking entry
24497 to the dynamic symbol resolver requires r11 be the function
24498 address (as it is for linker generated PLT stubs). Ensure
24499 r11 stays valid to the bctr by marking r11 used by the call. */
24504 /* Handle indirect calls. */
24505 if (GET_CODE (func
) != SYMBOL_REF
)
24507 func
= force_reg (Pmode
, func
);
24509 /* Indirect sibcalls must go via CTR. That can't be left to
24510 reload because we want to mark every instruction in an inline
24511 PLT call sequence with a reloc, enabling the linker to edit
24512 the sequence back to a direct call when that makes sense. */
24513 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
24516 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
24517 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24518 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
24519 v
= gen_rtvec (2, func_addr
, func_desc
);
24520 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
24523 emit_move_insn (func_addr
, func
);
24528 /* Create the call. */
24529 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
24530 if (value
!= NULL_RTX
)
24531 call
[0] = gen_rtx_SET (value
, call
[0]);
24533 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
24534 call
[2] = simple_return_rtx
;
24536 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
24537 insn
= emit_call_insn (insn
);
24539 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
24544 /* Expand code to perform a call under the Darwin ABI.
24545 Modulo handling of mlongcall, this is much the same as sysv.
24546 if/when the longcall optimisation is removed, we could drop this
24547 code and use the sysv case (taking care to avoid the tls stuff).
24549 We can use this for sibcalls too, if needed. */
24552 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
24553 rtx cookie
, bool sibcall
)
24555 rtx func
= func_desc
;
24559 int cookie_val
= INTVAL (cookie
);
24560 bool make_island
= false;
24562 /* Handle longcall attributes, there are two cases for Darwin:
24563 1) Newer linkers are capable of synthesising any branch islands needed.
24564 2) We need a helper branch island synthesised by the compiler.
24565 The second case has mostly been retired and we don't use it for m64.
24566 In fact, it's is an optimisation, we could just indirect as sysv does..
24567 ... however, backwards compatibility for now.
24568 If we're going to use this, then we need to keep the CALL_LONG bit set,
24569 so that we can pick up the special insn form later. */
24570 if ((cookie_val
& CALL_LONG
) != 0
24571 && GET_CODE (func_desc
) == SYMBOL_REF
)
24573 /* FIXME: the longcall opt should not hang off this flag, it is most
24574 likely incorrect for kernel-mode code-generation. */
24575 if (darwin_symbol_stubs
&& TARGET_32BIT
)
24576 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
24579 /* The linker is capable of doing this, but the user explicitly
24580 asked for -mlongcall, so we'll do the 'normal' version. */
24581 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
24582 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
24586 /* Handle indirect calls. */
24587 if (GET_CODE (func
) != SYMBOL_REF
)
24589 func
= force_reg (Pmode
, func
);
24591 /* Indirect calls via CTR are strongly preferred over indirect
24592 calls via LR, and are required for indirect sibcalls, so move
24593 the address there. */
24594 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
24595 emit_move_insn (func_addr
, func
);
24600 /* Create the call. */
24601 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
24602 if (value
!= NULL_RTX
)
24603 call
[0] = gen_rtx_SET (value
, call
[0]);
24605 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
24608 call
[2] = simple_return_rtx
;
24610 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
24612 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
24613 insn
= emit_call_insn (insn
);
24614 /* Now we have the debug info in the insn, we can set up the branch island
24615 if we're using one. */
24618 tree funname
= get_identifier (XSTR (func_desc
, 0));
24620 if (no_previous_def (funname
))
24622 rtx label_rtx
= gen_label_rtx ();
24623 char *label_buf
, temp_buf
[256];
24624 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
24625 CODE_LABEL_NUMBER (label_rtx
));
24626 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
24627 tree labelname
= get_identifier (label_buf
);
24628 add_compiler_branch_island (labelname
, funname
,
24629 insn_line ((const rtx_insn
*)insn
));
24636 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
24637 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
24640 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
24648 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
24649 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
24652 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
24658 /* Return whether we should generate PC-relative code for FNDECL. */
24660 rs6000_fndecl_pcrel_p (const_tree fndecl
)
24662 if (DEFAULT_ABI
!= ABI_ELFv2
)
24665 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
24667 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
24668 && TARGET_CMODEL
== CMODEL_MEDIUM
);
24671 /* Return whether we should generate PC-relative code for *FN. */
24673 rs6000_pcrel_p (struct function
*fn
)
24675 if (DEFAULT_ABI
!= ABI_ELFv2
)
24678 /* Optimize usual case. */
24680 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
24681 && TARGET_CMODEL
== CMODEL_MEDIUM
);
24683 return rs6000_fndecl_pcrel_p (fn
->decl
);
24687 /* Given an address (ADDR), a mode (MODE), and what the format of the
24688 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24689 for the address. */
24692 address_to_insn_form (rtx addr
,
24694 enum non_prefixed_form non_prefixed_format
)
24696 /* Single register is easy. */
24697 if (REG_P (addr
) || SUBREG_P (addr
))
24698 return INSN_FORM_BASE_REG
;
24700 /* If the non prefixed instruction format doesn't support offset addressing,
24701 make sure only indexed addressing is allowed.
24703 We special case SDmode so that the register allocator does not try to move
24704 SDmode through GPR registers, but instead uses the 32-bit integer load and
24705 store instructions for the floating point registers. */
24706 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
24708 if (GET_CODE (addr
) != PLUS
)
24709 return INSN_FORM_BAD
;
24711 rtx op0
= XEXP (addr
, 0);
24712 rtx op1
= XEXP (addr
, 1);
24713 if (!REG_P (op0
) && !SUBREG_P (op0
))
24714 return INSN_FORM_BAD
;
24716 if (!REG_P (op1
) && !SUBREG_P (op1
))
24717 return INSN_FORM_BAD
;
24719 return INSN_FORM_X
;
24722 /* Deal with update forms. */
24723 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
24724 return INSN_FORM_UPDATE
;
24726 /* Handle PC-relative symbols and labels. Check for both local and external
24727 symbols. Assume labels are always local. */
24730 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_LOCAL_P (addr
))
24731 return INSN_FORM_PCREL_EXTERNAL
;
24733 if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
24734 return INSN_FORM_PCREL_LOCAL
;
24737 if (GET_CODE (addr
) == CONST
)
24738 addr
= XEXP (addr
, 0);
24740 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24741 if (GET_CODE (addr
) == LO_SUM
)
24742 return INSN_FORM_LO_SUM
;
24744 /* Everything below must be an offset address of some form. */
24745 if (GET_CODE (addr
) != PLUS
)
24746 return INSN_FORM_BAD
;
24748 rtx op0
= XEXP (addr
, 0);
24749 rtx op1
= XEXP (addr
, 1);
24751 /* Check for indexed addresses. */
24752 if (REG_P (op1
) || SUBREG_P (op1
))
24754 if (REG_P (op0
) || SUBREG_P (op0
))
24755 return INSN_FORM_X
;
24757 return INSN_FORM_BAD
;
24760 if (!CONST_INT_P (op1
))
24761 return INSN_FORM_BAD
;
24763 HOST_WIDE_INT offset
= INTVAL (op1
);
24764 if (!SIGNED_INTEGER_34BIT_P (offset
))
24765 return INSN_FORM_BAD
;
24767 /* Check for local and external PC-relative addresses. Labels are always
24771 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_LOCAL_P (op0
))
24772 return INSN_FORM_PCREL_EXTERNAL
;
24774 if (SYMBOL_REF_P (op0
) || LABEL_REF_P (op0
))
24775 return INSN_FORM_PCREL_LOCAL
;
24778 /* If it isn't PC-relative, the address must use a base register. */
24779 if (!REG_P (op0
) && !SUBREG_P (op0
))
24780 return INSN_FORM_BAD
;
24782 /* Large offsets must be prefixed. */
24783 if (!SIGNED_INTEGER_16BIT_P (offset
))
24785 if (TARGET_PREFIXED_ADDR
)
24786 return INSN_FORM_PREFIXED_NUMERIC
;
24788 return INSN_FORM_BAD
;
24791 /* We have a 16-bit offset, see what default instruction format to use. */
24792 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
24794 unsigned size
= GET_MODE_SIZE (mode
);
24796 /* On 64-bit systems, assume 64-bit integers need to use DS form
24797 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24798 (for LXV and STXV). TImode is problematical in that its normal usage
24799 is expected to be GPRs where it wants a DS instruction format, but if
24800 it goes into the vector registers, it wants a DQ instruction
24802 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
24803 non_prefixed_format
= NON_PREFIXED_DS
;
24805 else if (TARGET_VSX
&& size
>= 16
24806 && (VECTOR_MODE_P (mode
) || FLOAT128_VECTOR_P (mode
)))
24807 non_prefixed_format
= NON_PREFIXED_DQ
;
24810 non_prefixed_format
= NON_PREFIXED_D
;
24813 /* Classify the D/DS/DQ-form addresses. */
24814 switch (non_prefixed_format
)
24816 /* Instruction format D, all 16 bits are valid. */
24817 case NON_PREFIXED_D
:
24818 return INSN_FORM_D
;
24820 /* Instruction format DS, bottom 2 bits must be 0. */
24821 case NON_PREFIXED_DS
:
24822 if ((offset
& 3) == 0)
24823 return INSN_FORM_DS
;
24825 else if (TARGET_PREFIXED_ADDR
)
24826 return INSN_FORM_PREFIXED_NUMERIC
;
24829 return INSN_FORM_BAD
;
24831 /* Instruction format DQ, bottom 4 bits must be 0. */
24832 case NON_PREFIXED_DQ
:
24833 if ((offset
& 15) == 0)
24834 return INSN_FORM_DQ
;
24836 else if (TARGET_PREFIXED_ADDR
)
24837 return INSN_FORM_PREFIXED_NUMERIC
;
24840 return INSN_FORM_BAD
;
24846 return INSN_FORM_BAD
;
24849 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24850 instruction format (D/DS/DQ) used for offset memory. */
24852 static enum non_prefixed_form
24853 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
24855 /* If it isn't a register, use the defaults. */
24856 if (!REG_P (reg
) && !SUBREG_P (reg
))
24857 return NON_PREFIXED_DEFAULT
;
24859 unsigned int r
= reg_or_subregno (reg
);
24861 /* If we have a pseudo, use the default instruction format. */
24862 if (!HARD_REGISTER_NUM_P (r
))
24863 return NON_PREFIXED_DEFAULT
;
24865 unsigned size
= GET_MODE_SIZE (mode
);
24867 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24868 128-bit floating point, and 128-bit integers. */
24869 if (FP_REGNO_P (r
))
24871 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
24872 return NON_PREFIXED_D
;
24875 return NON_PREFIXED_X
;
24877 else if (TARGET_VSX
&& size
>= 16
24878 && (VECTOR_MODE_P (mode
)
24879 || FLOAT128_VECTOR_P (mode
)
24880 || mode
== TImode
|| mode
== CTImode
))
24881 return NON_PREFIXED_DQ
;
24884 return NON_PREFIXED_DEFAULT
;
24887 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24888 128-bit floating point, and 128-bit integers. */
24889 else if (ALTIVEC_REGNO_P (r
))
24891 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
24892 return NON_PREFIXED_DS
;
24895 return NON_PREFIXED_X
;
24897 else if (TARGET_VSX
&& size
>= 16
24898 && (VECTOR_MODE_P (mode
)
24899 || FLOAT128_VECTOR_P (mode
)
24900 || mode
== TImode
|| mode
== CTImode
))
24901 return NON_PREFIXED_DQ
;
24904 return NON_PREFIXED_DEFAULT
;
24907 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24908 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24909 through the GPR registers for memory operations. */
24910 else if (TARGET_POWERPC64
&& size
>= 8)
24911 return NON_PREFIXED_DS
;
24913 return NON_PREFIXED_D
;
24917 /* Whether a load instruction is a prefixed instruction. This is called from
24918 the prefixed attribute processing. */
24921 prefixed_load_p (rtx_insn
*insn
)
24923 /* Validate the insn to make sure it is a normal load insn. */
24924 extract_insn_cached (insn
);
24925 if (recog_data
.n_operands
< 2)
24928 rtx reg
= recog_data
.operand
[0];
24929 rtx mem
= recog_data
.operand
[1];
24931 if (!REG_P (reg
) && !SUBREG_P (reg
))
24937 /* Prefixed load instructions do not support update or indexed forms. */
24938 if (get_attr_indexed (insn
) == INDEXED_YES
24939 || get_attr_update (insn
) == UPDATE_YES
)
24942 /* LWA uses the DS format instead of the D format that LWZ uses. */
24943 enum non_prefixed_form non_prefixed
;
24944 machine_mode reg_mode
= GET_MODE (reg
);
24945 machine_mode mem_mode
= GET_MODE (mem
);
24947 if (mem_mode
== SImode
&& reg_mode
== DImode
24948 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
24949 non_prefixed
= NON_PREFIXED_DS
;
24952 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
24954 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
24957 /* Whether a store instruction is a prefixed instruction. This is called from
24958 the prefixed attribute processing. */
24961 prefixed_store_p (rtx_insn
*insn
)
24963 /* Validate the insn to make sure it is a normal store insn. */
24964 extract_insn_cached (insn
);
24965 if (recog_data
.n_operands
< 2)
24968 rtx mem
= recog_data
.operand
[0];
24969 rtx reg
= recog_data
.operand
[1];
24971 if (!REG_P (reg
) && !SUBREG_P (reg
))
24977 /* Prefixed store instructions do not support update or indexed forms. */
24978 if (get_attr_indexed (insn
) == INDEXED_YES
24979 || get_attr_update (insn
) == UPDATE_YES
)
24982 machine_mode mem_mode
= GET_MODE (mem
);
24983 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
24984 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
24987 /* Whether a load immediate or add instruction is a prefixed instruction. This
24988 is called from the prefixed attribute processing. */
24991 prefixed_paddi_p (rtx_insn
*insn
)
24993 rtx set
= single_set (insn
);
24997 rtx dest
= SET_DEST (set
);
24998 rtx src
= SET_SRC (set
);
25000 if (!REG_P (dest
) && !SUBREG_P (dest
))
25003 /* Is this a load immediate that can't be done with a simple ADDI or
25005 if (CONST_INT_P (src
))
25006 return (satisfies_constraint_eI (src
)
25007 && !satisfies_constraint_I (src
)
25008 && !satisfies_constraint_L (src
));
25010 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25012 if (GET_CODE (src
) == PLUS
)
25014 rtx op1
= XEXP (src
, 1);
25016 return (CONST_INT_P (op1
)
25017 && satisfies_constraint_eI (op1
)
25018 && !satisfies_constraint_I (op1
)
25019 && !satisfies_constraint_L (op1
));
25022 /* If not, is it a load of a PC-relative address? */
25023 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
25026 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
25029 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
25030 NON_PREFIXED_DEFAULT
);
25032 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
25035 /* Whether the next instruction needs a 'p' prefix issued before the
25036 instruction is printed out. */
25037 static bool next_insn_prefixed_p
;
25039 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25040 outputting the assembler code. On the PowerPC, we remember if the current
25041 insn is a prefixed insn where we need to emit a 'p' before the insn.
25043 In addition, if the insn is part of a PC-relative reference to an external
25044 label optimization, this is recorded also. */
25046 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
25048 next_insn_prefixed_p
= (get_attr_prefixed (insn
) != PREFIXED_NO
);
25052 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25053 We use it to emit a 'p' for prefixed insns that is set in
25054 FINAL_PRESCAN_INSN. */
25056 rs6000_asm_output_opcode (FILE *stream
)
25058 if (next_insn_prefixed_p
)
25059 fprintf (stream
, "p");
25064 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25065 should be adjusted to reflect any required changes. This macro is used when
25066 there is some systematic length adjustment required that would be difficult
25067 to express in the length attribute.
25069 In the PowerPC, we use this to adjust the length of an instruction if one or
25070 more prefixed instructions are generated, using the attribute
25071 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25072 hardware requires that a prefied instruciton does not cross a 64-byte
25073 boundary. This means the compiler has to assume the length of the first
25074 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25075 already set for the non-prefixed instruction, we just need to udpate for the
25079 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
25081 if (TARGET_PREFIXED_ADDR
&& NONJUMP_INSN_P (insn
))
25083 rtx pattern
= PATTERN (insn
);
25084 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
25085 && get_attr_prefixed (insn
) == PREFIXED_YES
)
25087 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
25088 length
+= 4 * (num_prefixed
+ 1);
25096 #ifdef HAVE_GAS_HIDDEN
25097 # define USE_HIDDEN_LINKONCE 1
25099 # define USE_HIDDEN_LINKONCE 0
25102 /* Fills in the label name that should be used for a 476 link stack thunk. */
25105 get_ppc476_thunk_name (char name
[32])
25107 gcc_assert (TARGET_LINK_STACK
);
25109 if (USE_HIDDEN_LINKONCE
)
25110 sprintf (name
, "__ppc476.get_thunk");
25112 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
25115 /* This function emits the simple thunk routine that is used to preserve
25116 the link stack on the 476 cpu. */
25118 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
25120 rs6000_code_end (void)
25125 if (!TARGET_LINK_STACK
)
25128 get_ppc476_thunk_name (name
);
25130 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
25131 build_function_type_list (void_type_node
, NULL_TREE
));
25132 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
25133 NULL_TREE
, void_type_node
);
25134 TREE_PUBLIC (decl
) = 1;
25135 TREE_STATIC (decl
) = 1;
25138 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
25140 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
25141 targetm
.asm_out
.unique_section (decl
, 0);
25142 switch_to_section (get_named_section (decl
, NULL
, 0));
25143 DECL_WEAK (decl
) = 1;
25144 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
25145 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
25146 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
25147 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
25152 switch_to_section (text_section
);
25153 ASM_OUTPUT_LABEL (asm_out_file
, name
);
25156 DECL_INITIAL (decl
) = make_node (BLOCK
);
25157 current_function_decl
= decl
;
25158 allocate_struct_function (decl
, false);
25159 init_function_start (decl
);
25160 first_function_block_is_cold
= false;
25161 /* Make sure unwind info is emitted for the thunk if needed. */
25162 final_start_function (emit_barrier (), asm_out_file
, 1);
25164 fputs ("\tblr\n", asm_out_file
);
25166 final_end_function ();
25167 init_insn_lengths ();
25168 free_after_compilation (cfun
);
25170 current_function_decl
= NULL
;
25173 /* Add r30 to hard reg set if the prologue sets it up and it is not
25174 pic_offset_table_rtx. */
25177 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
25179 if (!TARGET_SINGLE_PIC_BASE
25181 && TARGET_MINIMAL_TOC
25182 && !constant_pool_empty_p ())
25183 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
25184 if (cfun
->machine
->split_stack_argp_used
)
25185 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
25187 /* Make sure the hard reg set doesn't include r2, which was possibly added
25188 via PIC_OFFSET_TABLE_REGNUM. */
25190 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
25194 /* Helper function for rs6000_split_logical to emit a logical instruction after
25195 spliting the operation to single GPR registers.
25197 DEST is the destination register.
25198 OP1 and OP2 are the input source registers.
25199 CODE is the base operation (AND, IOR, XOR, NOT).
25200 MODE is the machine mode.
25201 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25202 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25203 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25206 rs6000_split_logical_inner (rtx dest
,
25209 enum rtx_code code
,
25211 bool complement_final_p
,
25212 bool complement_op1_p
,
25213 bool complement_op2_p
)
25217 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25218 if (op2
&& CONST_INT_P (op2
)
25219 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
25220 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
25222 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
25223 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
25225 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25230 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
25234 else if (value
== mask
)
25236 if (!rtx_equal_p (dest
, op1
))
25237 emit_insn (gen_rtx_SET (dest
, op1
));
25242 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25243 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25244 else if (code
== IOR
|| code
== XOR
)
25248 if (!rtx_equal_p (dest
, op1
))
25249 emit_insn (gen_rtx_SET (dest
, op1
));
25255 if (code
== AND
&& mode
== SImode
25256 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
25258 emit_insn (gen_andsi3 (dest
, op1
, op2
));
25262 if (complement_op1_p
)
25263 op1
= gen_rtx_NOT (mode
, op1
);
25265 if (complement_op2_p
)
25266 op2
= gen_rtx_NOT (mode
, op2
);
25268 /* For canonical RTL, if only one arm is inverted it is the first. */
25269 if (!complement_op1_p
&& complement_op2_p
)
25270 std::swap (op1
, op2
);
25272 bool_rtx
= ((code
== NOT
)
25273 ? gen_rtx_NOT (mode
, op1
)
25274 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
25276 if (complement_final_p
)
25277 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
25279 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
25282 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25283 operations are split immediately during RTL generation to allow for more
25284 optimizations of the AND/IOR/XOR.
25286 OPERANDS is an array containing the destination and two input operands.
25287 CODE is the base operation (AND, IOR, XOR, NOT).
25288 MODE is the machine mode.
25289 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25290 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25291 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25292 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25293 formation of the AND instructions. */
25296 rs6000_split_logical_di (rtx operands
[3],
25297 enum rtx_code code
,
25298 bool complement_final_p
,
25299 bool complement_op1_p
,
25300 bool complement_op2_p
)
25302 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
25303 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
25304 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
25305 enum hi_lo
{ hi
= 0, lo
= 1 };
25306 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
25309 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
25310 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
25311 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
25312 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
25315 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
25318 if (!CONST_INT_P (operands
[2]))
25320 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
25321 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
25325 HOST_WIDE_INT value
= INTVAL (operands
[2]);
25326 HOST_WIDE_INT value_hi_lo
[2];
25328 gcc_assert (!complement_final_p
);
25329 gcc_assert (!complement_op1_p
);
25330 gcc_assert (!complement_op2_p
);
25332 value_hi_lo
[hi
] = value
>> 32;
25333 value_hi_lo
[lo
] = value
& lower_32bits
;
25335 for (i
= 0; i
< 2; i
++)
25337 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
25339 if (sub_value
& sign_bit
)
25340 sub_value
|= upper_32bits
;
25342 op2_hi_lo
[i
] = GEN_INT (sub_value
);
25344 /* If this is an AND instruction, check to see if we need to load
25345 the value in a register. */
25346 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
25347 && !and_operand (op2_hi_lo
[i
], SImode
))
25348 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
25353 for (i
= 0; i
< 2; i
++)
25355 /* Split large IOR/XOR operations. */
25356 if ((code
== IOR
|| code
== XOR
)
25357 && CONST_INT_P (op2_hi_lo
[i
])
25358 && !complement_final_p
25359 && !complement_op1_p
25360 && !complement_op2_p
25361 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
25363 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
25364 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
25365 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
25366 rtx tmp
= gen_reg_rtx (SImode
);
25368 /* Make sure the constant is sign extended. */
25369 if ((hi_16bits
& sign_bit
) != 0)
25370 hi_16bits
|= upper_32bits
;
25372 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
25373 code
, SImode
, false, false, false);
25375 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
25376 code
, SImode
, false, false, false);
25379 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
25380 code
, SImode
, complement_final_p
,
25381 complement_op1_p
, complement_op2_p
);
25387 /* Split the insns that make up boolean operations operating on multiple GPR
25388 registers. The boolean MD patterns ensure that the inputs either are
25389 exactly the same as the output registers, or there is no overlap.
25391 OPERANDS is an array containing the destination and two input operands.
25392 CODE is the base operation (AND, IOR, XOR, NOT).
25393 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25394 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25395 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25398 rs6000_split_logical (rtx operands
[3],
25399 enum rtx_code code
,
25400 bool complement_final_p
,
25401 bool complement_op1_p
,
25402 bool complement_op2_p
)
25404 machine_mode mode
= GET_MODE (operands
[0]);
25405 machine_mode sub_mode
;
25407 int sub_size
, regno0
, regno1
, nregs
, i
;
25409 /* If this is DImode, use the specialized version that can run before
25410 register allocation. */
25411 if (mode
== DImode
&& !TARGET_POWERPC64
)
25413 rs6000_split_logical_di (operands
, code
, complement_final_p
,
25414 complement_op1_p
, complement_op2_p
);
25420 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
25421 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
25422 sub_size
= GET_MODE_SIZE (sub_mode
);
25423 regno0
= REGNO (op0
);
25424 regno1
= REGNO (op1
);
25426 gcc_assert (reload_completed
);
25427 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
25428 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
25430 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
25431 gcc_assert (nregs
> 1);
25433 if (op2
&& REG_P (op2
))
25434 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
25436 for (i
= 0; i
< nregs
; i
++)
25438 int offset
= i
* sub_size
;
25439 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
25440 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
25441 rtx sub_op2
= ((code
== NOT
)
25443 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
25445 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
25446 complement_final_p
, complement_op1_p
,
25454 /* Return true if the peephole2 can combine a load involving a combination of
25455 an addis instruction and a load with an offset that can be fused together on
25459 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
25460 rtx addis_value
, /* addis value. */
25461 rtx target
, /* target register that is loaded. */
25462 rtx mem
) /* bottom part of the memory addr. */
25467 /* Validate arguments. */
25468 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
25471 if (!base_reg_operand (target
, GET_MODE (target
)))
25474 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
25477 /* Allow sign/zero extension. */
25478 if (GET_CODE (mem
) == ZERO_EXTEND
25479 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
25480 mem
= XEXP (mem
, 0);
25485 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
25488 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
25489 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
25492 /* Validate that the register used to load the high value is either the
25493 register being loaded, or we can safely replace its use.
25495 This function is only called from the peephole2 pass and we assume that
25496 there are 2 instructions in the peephole (addis and load), so we want to
25497 check if the target register was not used in the memory address and the
25498 register to hold the addis result is dead after the peephole. */
25499 if (REGNO (addis_reg
) != REGNO (target
))
25501 if (reg_mentioned_p (target
, mem
))
25504 if (!peep2_reg_dead_p (2, addis_reg
))
25507 /* If the target register being loaded is the stack pointer, we must
25508 avoid loading any other value into it, even temporarily. */
25509 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
25513 base_reg
= XEXP (addr
, 0);
25514 return REGNO (addis_reg
) == REGNO (base_reg
);
25517 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25518 sequence. We adjust the addis register to use the target register. If the
25519 load sign extends, we adjust the code to do the zero extending load, and an
25520 explicit sign extension later since the fusion only covers zero extending
25524 operands[0] register set with addis (to be replaced with target)
25525 operands[1] value set via addis
25526 operands[2] target register being loaded
25527 operands[3] D-form memory reference using operands[0]. */
25530 expand_fusion_gpr_load (rtx
*operands
)
25532 rtx addis_value
= operands
[1];
25533 rtx target
= operands
[2];
25534 rtx orig_mem
= operands
[3];
25535 rtx new_addr
, new_mem
, orig_addr
, offset
;
25536 enum rtx_code plus_or_lo_sum
;
25537 machine_mode target_mode
= GET_MODE (target
);
25538 machine_mode extend_mode
= target_mode
;
25539 machine_mode ptr_mode
= Pmode
;
25540 enum rtx_code extend
= UNKNOWN
;
25542 if (GET_CODE (orig_mem
) == ZERO_EXTEND
25543 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
25545 extend
= GET_CODE (orig_mem
);
25546 orig_mem
= XEXP (orig_mem
, 0);
25547 target_mode
= GET_MODE (orig_mem
);
25550 gcc_assert (MEM_P (orig_mem
));
25552 orig_addr
= XEXP (orig_mem
, 0);
25553 plus_or_lo_sum
= GET_CODE (orig_addr
);
25554 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
25556 offset
= XEXP (orig_addr
, 1);
25557 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
25558 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
25560 if (extend
!= UNKNOWN
)
25561 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
25563 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
25564 UNSPEC_FUSION_GPR
);
25565 emit_insn (gen_rtx_SET (target
, new_mem
));
25567 if (extend
== SIGN_EXTEND
)
25569 int sub_off
= ((BYTES_BIG_ENDIAN
)
25570 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
25573 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
25575 emit_insn (gen_rtx_SET (target
,
25576 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
25582 /* Emit the addis instruction that will be part of a fused instruction
25586 emit_fusion_addis (rtx target
, rtx addis_value
)
25589 const char *addis_str
= NULL
;
25591 /* Emit the addis instruction. */
25592 fuse_ops
[0] = target
;
25593 if (satisfies_constraint_L (addis_value
))
25595 fuse_ops
[1] = addis_value
;
25596 addis_str
= "lis %0,%v1";
25599 else if (GET_CODE (addis_value
) == PLUS
)
25601 rtx op0
= XEXP (addis_value
, 0);
25602 rtx op1
= XEXP (addis_value
, 1);
25604 if (REG_P (op0
) && CONST_INT_P (op1
)
25605 && satisfies_constraint_L (op1
))
25609 addis_str
= "addis %0,%1,%v2";
25613 else if (GET_CODE (addis_value
) == HIGH
)
25615 rtx value
= XEXP (addis_value
, 0);
25616 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
25618 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
25619 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
25621 addis_str
= "addis %0,%2,%1@toc@ha";
25623 else if (TARGET_XCOFF
)
25624 addis_str
= "addis %0,%1@u(%2)";
25627 gcc_unreachable ();
25630 else if (GET_CODE (value
) == PLUS
)
25632 rtx op0
= XEXP (value
, 0);
25633 rtx op1
= XEXP (value
, 1);
25635 if (GET_CODE (op0
) == UNSPEC
25636 && XINT (op0
, 1) == UNSPEC_TOCREL
25637 && CONST_INT_P (op1
))
25639 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
25640 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
25643 addis_str
= "addis %0,%2,%1+%3@toc@ha";
25645 else if (TARGET_XCOFF
)
25646 addis_str
= "addis %0,%1+%3@u(%2)";
25649 gcc_unreachable ();
25653 else if (satisfies_constraint_L (value
))
25655 fuse_ops
[1] = value
;
25656 addis_str
= "lis %0,%v1";
25659 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
25661 fuse_ops
[1] = value
;
25662 addis_str
= "lis %0,%1@ha";
25667 fatal_insn ("Could not generate addis value for fusion", addis_value
);
25669 output_asm_insn (addis_str
, fuse_ops
);
25672 /* Emit a D-form load or store instruction that is the second instruction
25673 of a fusion sequence. */
25676 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
25679 char insn_template
[80];
25681 fuse_ops
[0] = load_reg
;
25682 fuse_ops
[1] = addis_reg
;
25684 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
25686 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
25687 fuse_ops
[2] = offset
;
25688 output_asm_insn (insn_template
, fuse_ops
);
25691 else if (GET_CODE (offset
) == UNSPEC
25692 && XINT (offset
, 1) == UNSPEC_TOCREL
)
25695 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
25697 else if (TARGET_XCOFF
)
25698 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
25701 gcc_unreachable ();
25703 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
25704 output_asm_insn (insn_template
, fuse_ops
);
25707 else if (GET_CODE (offset
) == PLUS
25708 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
25709 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
25710 && CONST_INT_P (XEXP (offset
, 1)))
25712 rtx tocrel_unspec
= XEXP (offset
, 0);
25714 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
25716 else if (TARGET_XCOFF
)
25717 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
25720 gcc_unreachable ();
25722 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
25723 fuse_ops
[3] = XEXP (offset
, 1);
25724 output_asm_insn (insn_template
, fuse_ops
);
25727 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
25729 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
25731 fuse_ops
[2] = offset
;
25732 output_asm_insn (insn_template
, fuse_ops
);
25736 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
25741 /* Given an address, convert it into the addis and load offset parts. Addresses
25742 created during the peephole2 process look like:
25743 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25744 (unspec [(...)] UNSPEC_TOCREL)) */
25747 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
25751 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
25753 hi
= XEXP (addr
, 0);
25754 lo
= XEXP (addr
, 1);
25757 gcc_unreachable ();
25763 /* Return a string to fuse an addis instruction with a gpr load to the same
25764 register that we loaded up the addis instruction. The address that is used
25765 is the logical address that was formed during peephole2:
25766 (lo_sum (high) (low-part))
25768 The code is complicated, so we call output_asm_insn directly, and just
25772 emit_fusion_gpr_load (rtx target
, rtx mem
)
25777 const char *load_str
= NULL
;
25780 if (GET_CODE (mem
) == ZERO_EXTEND
)
25781 mem
= XEXP (mem
, 0);
25783 gcc_assert (REG_P (target
) && MEM_P (mem
));
25785 addr
= XEXP (mem
, 0);
25786 fusion_split_address (addr
, &addis_value
, &load_offset
);
25788 /* Now emit the load instruction to the same register. */
25789 mode
= GET_MODE (mem
);
25807 gcc_assert (TARGET_POWERPC64
);
25812 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
25815 /* Emit the addis instruction. */
25816 emit_fusion_addis (target
, addis_value
);
25818 /* Emit the D-form load instruction. */
25819 emit_fusion_load (target
, target
, load_offset
, load_str
);
25825 #ifdef RS6000_GLIBC_ATOMIC_FENV
25826 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25827 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
25830 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25833 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
25835 if (!TARGET_HARD_FLOAT
)
25837 #ifdef RS6000_GLIBC_ATOMIC_FENV
25838 if (atomic_hold_decl
== NULL_TREE
)
25841 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
25842 get_identifier ("__atomic_feholdexcept"),
25843 build_function_type_list (void_type_node
,
25844 double_ptr_type_node
,
25846 TREE_PUBLIC (atomic_hold_decl
) = 1;
25847 DECL_EXTERNAL (atomic_hold_decl
) = 1;
25850 if (atomic_clear_decl
== NULL_TREE
)
25853 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
25854 get_identifier ("__atomic_feclearexcept"),
25855 build_function_type_list (void_type_node
,
25857 TREE_PUBLIC (atomic_clear_decl
) = 1;
25858 DECL_EXTERNAL (atomic_clear_decl
) = 1;
25861 tree const_double
= build_qualified_type (double_type_node
,
25863 tree const_double_ptr
= build_pointer_type (const_double
);
25864 if (atomic_update_decl
== NULL_TREE
)
25867 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
25868 get_identifier ("__atomic_feupdateenv"),
25869 build_function_type_list (void_type_node
,
25872 TREE_PUBLIC (atomic_update_decl
) = 1;
25873 DECL_EXTERNAL (atomic_update_decl
) = 1;
25876 tree fenv_var
= create_tmp_var_raw (double_type_node
);
25877 TREE_ADDRESSABLE (fenv_var
) = 1;
25878 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
, fenv_var
);
25880 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
25881 *clear
= build_call_expr (atomic_clear_decl
, 0);
25882 *update
= build_call_expr (atomic_update_decl
, 1,
25883 fold_convert (const_double_ptr
, fenv_addr
));
25888 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
25889 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
25890 tree call_mffs
= build_call_expr (mffs
, 0);
25892 /* Generates the equivalent of feholdexcept (&fenv_var)
25894 *fenv_var = __builtin_mffs ();
25896 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25897 __builtin_mtfsf (0xff, fenv_hold); */
25899 /* Mask to clear everything except for the rounding modes and non-IEEE
25900 arithmetic flag. */
25901 const unsigned HOST_WIDE_INT hold_exception_mask
=
25902 HOST_WIDE_INT_C (0xffffffff00000007);
25904 tree fenv_var
= create_tmp_var_raw (double_type_node
);
25906 tree hold_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_var
, call_mffs
);
25908 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
25909 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
25910 build_int_cst (uint64_type_node
,
25911 hold_exception_mask
));
25913 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
25916 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
25917 build_int_cst (unsigned_type_node
, 0xff),
25920 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
25922 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25924 double fenv_clear = __builtin_mffs ();
25925 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25926 __builtin_mtfsf (0xff, fenv_clear); */
25928 /* Mask to clear everything except for the rounding modes and non-IEEE
25929 arithmetic flag. */
25930 const unsigned HOST_WIDE_INT clear_exception_mask
=
25931 HOST_WIDE_INT_C (0xffffffff00000000);
25933 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
25935 tree clear_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_clear
, call_mffs
);
25937 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
25938 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
25940 build_int_cst (uint64_type_node
,
25941 clear_exception_mask
));
25943 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
25944 fenv_clear_llu_and
);
25946 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
25947 build_int_cst (unsigned_type_node
, 0xff),
25950 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
25952 /* Generates the equivalent of feupdateenv (&fenv_var)
25954 double old_fenv = __builtin_mffs ();
25955 double fenv_update;
25956 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25957 (*(uint64_t*)fenv_var 0x1ff80fff);
25958 __builtin_mtfsf (0xff, fenv_update); */
25960 const unsigned HOST_WIDE_INT update_exception_mask
=
25961 HOST_WIDE_INT_C (0xffffffff1fffff00);
25962 const unsigned HOST_WIDE_INT new_exception_mask
=
25963 HOST_WIDE_INT_C (0x1ff80fff);
25965 tree old_fenv
= create_tmp_var_raw (double_type_node
);
25966 tree update_mffs
= build2 (MODIFY_EXPR
, void_type_node
, old_fenv
, call_mffs
);
25968 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
25969 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
25970 build_int_cst (uint64_type_node
,
25971 update_exception_mask
));
25973 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
25974 build_int_cst (uint64_type_node
,
25975 new_exception_mask
));
25977 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
25978 old_llu_and
, new_llu_and
);
25980 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
25983 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
25984 build_int_cst (unsigned_type_node
, 0xff),
25985 fenv_update_mtfsf
);
25987 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
25991 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
25993 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
25995 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
25996 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
25998 /* The destination of the vmrgew instruction layout is:
25999 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26000 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26001 vmrgew instruction will be correct. */
26002 if (BYTES_BIG_ENDIAN
)
26004 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
26006 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
26011 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
26012 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
26015 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
26016 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
26018 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
26019 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
26021 if (BYTES_BIG_ENDIAN
)
26022 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
26024 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
26028 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
26030 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
26032 rtx_tmp0
= gen_reg_rtx (V2DImode
);
26033 rtx_tmp1
= gen_reg_rtx (V2DImode
);
26035 /* The destination of the vmrgew instruction layout is:
26036 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26037 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26038 vmrgew instruction will be correct. */
26039 if (BYTES_BIG_ENDIAN
)
26041 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
26042 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
26046 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
26047 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
26050 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
26051 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
26053 if (signed_convert
)
26055 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
26056 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
26060 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
26061 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
26064 if (BYTES_BIG_ENDIAN
)
26065 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
26067 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
26071 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
26074 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
26076 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
26077 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
26079 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
26080 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
26082 rtx_tmp2
= gen_reg_rtx (V4SImode
);
26083 rtx_tmp3
= gen_reg_rtx (V4SImode
);
26085 if (signed_convert
)
26087 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
26088 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
26092 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
26093 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
26096 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
26099 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26102 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
26103 optimization_type opt_type
)
26108 return (opt_type
== OPTIMIZE_FOR_SPEED
26109 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
26116 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26118 static HOST_WIDE_INT
26119 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
26121 if (TREE_CODE (exp
) == STRING_CST
26122 && (STRICT_ALIGNMENT
|| !optimize_size
))
26123 return MAX (align
, BITS_PER_WORD
);
26127 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26129 static HOST_WIDE_INT
26130 rs6000_starting_frame_offset (void)
26132 if (FRAME_GROWS_DOWNWARD
)
26134 return RS6000_STARTING_FRAME_OFFSET
;
26138 /* Create an alias for a mangled name where we have changed the mangling (in
26139 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26140 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26142 #if TARGET_ELF && RS6000_WEAK
26144 rs6000_globalize_decl_name (FILE * stream
, tree decl
)
26146 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
26148 targetm
.asm_out
.globalize_label (stream
, name
);
26150 if (rs6000_passes_ieee128
&& name
[0] == '_' && name
[1] == 'Z')
26152 tree save_asm_name
= DECL_ASSEMBLER_NAME (decl
);
26153 const char *old_name
;
26155 ieee128_mangling_gcc_8_1
= true;
26156 lang_hooks
.set_decl_assembler_name (decl
);
26157 old_name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
26158 SET_DECL_ASSEMBLER_NAME (decl
, save_asm_name
);
26159 ieee128_mangling_gcc_8_1
= false;
26161 if (strcmp (name
, old_name
) != 0)
26163 fprintf (stream
, "\t.weak %s\n", old_name
);
26164 fprintf (stream
, "\t.set %s,%s\n", old_name
, name
);
26171 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26172 function names from <foo>l to <foo>f128 if the default long double type is
26173 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26174 include file switches the names on systems that support long double as IEEE
26175 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26176 In the future, glibc will export names like __ieee128_sinf128 and we can
26177 switch to using those instead of using sinf128, which pollutes the user's
26180 This will switch the names for Fortran math functions as well (which doesn't
26181 use math.h). However, Fortran needs other changes to the compiler and
26182 library before you can switch the real*16 type at compile time.
26184 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26185 only do this if the default is that long double is IBM extended double, and
26186 the user asked for IEEE 128-bit. */
26189 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
26191 if (!TARGET_IEEEQUAD_DEFAULT
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
26192 && TREE_CODE (decl
) == FUNCTION_DECL
&& DECL_IS_BUILTIN (decl
) )
26194 size_t len
= IDENTIFIER_LENGTH (id
);
26195 const char *name
= IDENTIFIER_POINTER (id
);
26197 if (name
[len
- 1] == 'l')
26199 bool uses_ieee128_p
= false;
26200 tree type
= TREE_TYPE (decl
);
26201 machine_mode ret_mode
= TYPE_MODE (type
);
26203 /* See if the function returns a IEEE 128-bit floating point type or
26205 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
26206 uses_ieee128_p
= true;
26209 function_args_iterator args_iter
;
26212 /* See if the function passes a IEEE 128-bit floating point type
26213 or complex type. */
26214 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
26216 machine_mode arg_mode
= TYPE_MODE (arg
);
26217 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
26219 uses_ieee128_p
= true;
26225 /* If we passed or returned an IEEE 128-bit floating point type,
26226 change the name. */
26227 if (uses_ieee128_p
)
26229 char *name2
= (char *) alloca (len
+ 4);
26230 memcpy (name2
, name
, len
- 1);
26231 strcpy (name2
+ len
- 1, "f128");
26232 id
= get_identifier (name2
);
26240 /* Predict whether the given loop in gimple will be transformed in the RTL
26241 doloop_optimize pass. */
26244 rs6000_predict_doloop_p (struct loop
*loop
)
26248 /* On rs6000, targetm.can_use_doloop_p is actually
26249 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26250 if (loop
->inner
!= NULL
)
26252 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
26253 fprintf (dump_file
, "Predict doloop failure due to"
26254 " loop nesting.\n");
26261 struct gcc_target targetm
= TARGET_INITIALIZER
;
26263 #include "gt-rs6000.h"