COSTS_N_INSNS (3), /* SF->DF convert */
};
+/* Instruction costs on POWER9 processors. */
+static const
+struct processor_costs power9_cost = {
+ COSTS_N_INSNS (3), /* mulsi */
+ COSTS_N_INSNS (3), /* mulsi_const */
+ COSTS_N_INSNS (3), /* mulsi_const9 */
+ COSTS_N_INSNS (3), /* muldi */
+ COSTS_N_INSNS (19), /* divsi */
+ COSTS_N_INSNS (35), /* divdi */
+ COSTS_N_INSNS (3), /* fp */
+ COSTS_N_INSNS (3), /* dmul */
+ COSTS_N_INSNS (14), /* sdiv */
+ COSTS_N_INSNS (17), /* ddiv */
+ 128, /* cache line size */
+ 32, /* l1 cache */
+ 256, /* l2 cache */
+ 12, /* prefetch streams */
+ COSTS_N_INSNS (3), /* SF->DF convert */
+};
+
/* Instruction costs on POWER A2 processors. */
static const
struct processor_costs ppca2_cost = {
fprintf (stderr, DEBUG_FMT_S, "lra", "true");
if (TARGET_P8_FUSION)
- fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
- (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+ {
+ char options[80];
+
+ strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
+ if (TARGET_TOC_FUSION)
+ strcat (options, ", toc");
+
+ if (TARGET_P8_FUSION_SIGN)
+ strcat (options, ", sign");
+
+ fprintf (stderr, DEBUG_FMT_S, "fusion", options);
+ }
fprintf (stderr, DEBUG_FMT_S, "plt-format",
TARGET_SECURE_PLT ? "secure" : "bss");
for (m = 0; m < NUM_MACHINE_MODES; ++m)
{
machine_mode m2 = (machine_mode)m;
+ unsigned short msize = GET_MODE_SIZE (m2);
/* SDmode is special in that we want to access it only via REG+REG
addressing on power7 and above, since we want to use the LFIWZX and
/* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
addressing. Restrict addressing on SPE for 64-bit types
because of the SUBREG hackery used to address 64-bit floats in
- '32-bit' GPRs. */
+ '32-bit' GPRs. If we allow scalars into Altivec registers,
+ don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
if (TARGET_UPDATE
&& (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
- && GET_MODE_SIZE (m2) <= 8
+ && msize <= 8
&& !VECTOR_MODE_P (m2)
&& !FLOAT128_VECTOR_P (m2)
&& !COMPLEX_MODE_P (m2)
- && !indexed_only_p
- && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
+ && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
+ && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
+ && !(TARGET_E500_DOUBLE && msize == 8))
{
addr_mask |= RELOAD_REG_PRE_INCDEC;
/* VMX registers can do (REG & -16) and ((REG+REG) & -16)
addressing on 128-bit types. */
- if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
+ if (rc == RELOAD_REG_VMX && msize == 16
&& (addr_mask & RELOAD_REG_VALID) != 0)
addr_mask |= RELOAD_REG_AND_M16;
if (rs6000_tune_index >= 0)
tune_index = rs6000_tune_index;
else if (have_cpu)
- rs6000_tune_index = tune_index = cpu_index;
+ {
+ /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */
+ if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9)
+ rs6000_tune_index = tune_index = cpu_index;
+ else
+ {
+ size_t i;
+ tune_index = -1;
+ for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
+ if (processor_target_table[i].processor == PROCESSOR_POWER8)
+ {
+ rs6000_tune_index = tune_index = i;
+ break;
+ }
+ }
+ }
else
{
size_t i;
/* For the newer switches (vsx, dfp, etc.) set some of the older options,
unless the user explicitly used the -mno-<option> to disable the code. */
- if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+ if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM || TARGET_P9_MINMAX)
+ rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+ else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
else if (TARGET_VSX)
rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
& OPTION_MASK_P8_FUSION);
+ /* Setting additional fusion flags turns on base fusion. */
+ if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
+ {
+ if (TARGET_P8_FUSION_SIGN)
+ error ("-mpower8-fusion-sign requires -mpower8-fusion");
+
+ if (TARGET_TOC_FUSION)
+ error ("-mtoc-fusion requires -mpower8-fusion");
+
+ rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
+ }
+ else
+ rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
+ }
+
+ /* Power9 fusion is a superset over power8 fusion. */
+ if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
+ {
+ error ("-mpower9-fusion requires -mpower8-fusion");
+ rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
+ }
+ else
+ rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
+ }
+
+ /* Enable power9 fusion if we are tuning for power9, even if we aren't
+ generating power9 instructions. */
+ if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
+ rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
+ & OPTION_MASK_P9_FUSION);
+
/* Power8 does not fuse sign extended loads with the addis. If we are
optimizing at high levels for speed, convert a sign extended load into a
zero extending load, and an explicit sign extension. */
&& optimize >= 3)
rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
+ /* TOC fusion requires 64-bit and medium/large code model. */
+ if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
+ warning (0, N_("-mtoc-fusion requires 64-bit"));
+ }
+
+ if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
+ warning (0, N_("-mtoc-fusion requires medium/large code model"));
+ }
+
+ /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
+ model. */
+ if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
+ && (TARGET_CMODEL != CMODEL_SMALL)
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
+ rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
+
+ /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
+ if (TARGET_P9_DFORM && !TARGET_P9_VECTOR)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
+ error ("-mpower9-dform requires -mpower9-vector");
+ rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
+ }
+
+ if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_DF)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
+ error ("-mpower9-dform requires -mupper-regs-df");
+ rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
+ }
+
+ if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_SF)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
+ error ("-mpower9-dform requires -mupper-regs-sf");
+ rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
+ }
+
+ /* ISA 3.0 vector instructions include ISA 2.07. */
+ if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+ error ("-mpower9-vector requires -mpower8-vector");
+ rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
+ }
+
/* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
support. If we only have ISA 2.06 support, and the user did not specify
the switch, leave it set to -1 so the movmisalign patterns are enabled,
if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
error ("-mfloat128 requires VSX support");
- rs6000_isa_flags &= ~OPTION_MASK_FLOAT128;
+ rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
+ }
+
+ /* IEEE 128-bit floating point hardware instructions imply enabling
+ __float128. */
+ if (TARGET_FLOAT128_HW
+ && (rs6000_isa_flags & (OPTION_MASK_P9_VECTOR
+ | OPTION_MASK_DIRECT_MOVE
+ | OPTION_MASK_UPPER_REGS_DF
+ | OPTION_MASK_UPPER_REGS_SF)) == 0)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
+ error ("-mfloat128-hardware requires full ISA 3.0 support");
+
+ rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
}
+ else if (TARGET_P9_VECTOR && !TARGET_FLOAT128_HW
+ && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) == 0)
+ rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
+
+ if (TARGET_FLOAT128_HW
+ && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
+ rs6000_isa_flags |= OPTION_MASK_FLOAT128;
+
+ /* Print the options after updating the defaults. */
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
&& rs6000_cpu != PROCESSOR_POWER6
&& rs6000_cpu != PROCESSOR_POWER7
&& rs6000_cpu != PROCESSOR_POWER8
+ && rs6000_cpu != PROCESSOR_POWER9
&& rs6000_cpu != PROCESSOR_PPCA2
&& rs6000_cpu != PROCESSOR_CELL
&& rs6000_cpu != PROCESSOR_PPC476);
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8);
+ || rs6000_cpu == PROCESSOR_POWER8
+ || rs6000_cpu == PROCESSOR_POWER9);
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
|| rs6000_cpu == PROCESSOR_POWER7
|| rs6000_cpu == PROCESSOR_POWER8
+ || rs6000_cpu == PROCESSOR_POWER9
|| rs6000_cpu == PROCESSOR_PPCE500MC
|| rs6000_cpu == PROCESSOR_PPCE500MC64
|| rs6000_cpu == PROCESSOR_PPCE5500
rs6000_cost = &power8_cost;
break;
+ case PROCESSOR_POWER9:
+ rs6000_cost = &power9_cost;
+ break;
+
case PROCESSOR_PPCA2:
rs6000_cost = &ppca2_cost;
break;
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
|| rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8))
+ || rs6000_cpu == PROCESSOR_POWER8
+ || rs6000_cpu == PROCESSOR_POWER9))
return 5;
else
return align_loops_log;
|| !global_options_set.x_rs6000_cpu_index)
{
fputs ("\t.machine ", asm_out_file);
- if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
+ if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
+ fputs ("power9\n", asm_out_file);
+ else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
fputs ("power8\n", asm_out_file);
else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
fputs ("power7\n", asm_out_file);
|| rs6000_cpu_attr == CPU_POWER5
|| rs6000_cpu_attr == CPU_POWER7
|| rs6000_cpu_attr == CPU_POWER8
+ || rs6000_cpu_attr == CPU_POWER9
|| rs6000_cpu_attr == CPU_CELL)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
case CPU_POWER7:
return 5;
case CPU_POWER8:
+ case CPU_POWER9:
return 7;
default:
return 1;
}
break;
case PROCESSOR_POWER8:
+ case PROCESSOR_POWER9:
type = get_attr_type (insn);
switch (type)
}
break;
case PROCESSOR_POWER8:
+ case PROCESSOR_POWER9:
type = get_attr_type (insn);
switch (type)
/* Do we have a special group ending nop? */
if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
- || rs6000_cpu_attr == CPU_POWER8)
+ || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_POWER9)
{
nop = gen_group_ending_nop ();
emit_insn_before (nop, next_insn);
expensive than memory in order to bias spills to memory .*/
else if ((rs6000_cpu == PROCESSOR_POWER6
|| rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8)
+ || rs6000_cpu == PROCESSOR_POWER8
+ || rs6000_cpu == PROCESSOR_POWER9)
&& reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
ret = 6 * hard_regno_nregs[0][mode];
{ "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
false, true },
{ "float128", OPTION_MASK_FLOAT128, false, true },
+ { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
{ "isel", OPTION_MASK_ISEL, false, true },
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
{ "mfpgpr", OPTION_MASK_MFPGPR, false, true },
+ { "modulo", OPTION_MASK_MODULO, false, true },
{ "mulhw", OPTION_MASK_MULHW, false, true },
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
{ "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
{ "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
{ "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
+ { "power9-dform", OPTION_MASK_P9_DFORM, false, true },
+ { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
+ { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
+ { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
{ "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
{ "string", OPTION_MASK_STRING, false, true },
+ { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
{ "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
-mquad-memory-atomic -mno-quad-memory-atomic @gol
-mcompat-align-parm -mno-compat-align-parm @gol
-mupper-regs-df -mno-upper-regs-df -mupper-regs-sf -mno-upper-regs-sf @gol
--mupper-regs -mno-upper-regs @gol
--mfloat128 -mno-float128}
+-mupper-regs -mno-upper-regs -mmodulo -mno-modulo @gol
+-mfloat128 -mno-float128 -mfloat128-hardware -mno-float128-hardware @gol
+-mpower9-fusion -mno-mpower9-fusion -mpower9-vector -mno-power9-vector}
@emph{RX Options}
@gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol
@samp{e300c3}, @samp{e500mc}, @samp{e500mc64}, @samp{e5500},
@samp{e6500}, @samp{ec603e}, @samp{G3}, @samp{G4}, @samp{G5},
@samp{titan}, @samp{power3}, @samp{power4}, @samp{power5}, @samp{power5+},
-@samp{power6}, @samp{power6x}, @samp{power7}, @samp{power8}, @samp{powerpc},
-@samp{powerpc64}, @samp{powerpc64le}, and @samp{rs64}.
+@samp{power6}, @samp{power6x}, @samp{power7}, @samp{power8},
+@samp{power9}, @samp{powerpc}, @samp{powerpc64}, @samp{powerpc64le},
+and @samp{rs64}.
@option{-mcpu=powerpc}, @option{-mcpu=powerpc64}, and
@option{-mcpu=powerpc64le} specify pure 32-bit PowerPC (either
-mpowerpc-gpopt -mpowerpc-gfxopt -msingle-float -mdouble-float @gol
-msimple-fpu -mstring -mmulhw -mdlmzb -mmfpgpr -mvsx @gol
-mcrypto -mdirect-move -mpower8-fusion -mpower8-vector @gol
--mquad-memory -mquad-memory-atomic}
+-mquad-memory -mquad-memory-atomic -mmodulo -mfloat128 -mfloat128-hardware @gol
+-mpower9-fusion -mpower9-vector}
The particular options set for any particular CPU varies between
compiler versions, depending on what setting seems to produce optimal
@opindex mfloat128
@opindex mno-float128
Enable/disable the @var{__float128} keyword for IEEE 128-bit floating point
-and use software emulation for IEEE 128-bit floating point.
+and use either software emulation for IEEE 128-bit floating point or
+hardware instructions.
The VSX instruction set (@option{-mvsx}, @option{-mcpu=power7}, or
@option{-mcpu=power8}) must be enabled to use the @option{-mfloat128}
option.
+@item -mfloat128-hardware
+@itemx -mno-float128-hardware
+@opindex mfloat128-hardware
+@opindex mno-float128-hardware
+Enable/disable using ISA 3.0 hardware instructions to support the
+@var{__float128} data type.
+
+@item -mmodulo
+@itemx -mno-modulo
+@opindex mmodulo
+@opindex mno-module
+Generate code that uses (does not use) the ISA 3.0 integer modulo
+instructions. The @option{-mmodulo} option is enabled by default
+with the @option{-mcpu=power9} option.
+
+@item -mpower9-fusion
+@itemx -mno-power9-fusion
+@opindex mpower9-fusion
+@opindex mno-power9-fusion
+Generate code that keeps (does not keeps) some operations adjacent so
+that the instructions can be fused together on power9 and later
+processors.
+
+@item -mpower9-vector
+@itemx -mno-power9-vector
+@opindex mpower9-vector
+@opindex mno-power9-vector
+Generate code that uses (does not use) the vector and scalar
+instructions that were added in version 2.07 of the PowerPC ISA. Also
+enable the use of built-in functions that allow more direct access to
+the vector instructions.
+
@item -mfloat-gprs=@var{yes/single/double/no}
@itemx -mfloat-gprs
@opindex mfloat-gprs