-2017-11-14 Boris Kolpackov <boris@codesynthesis.com>
+2017-11-26 Julia Koval <julia.koval@intel.com>
+
+ * config/i386/i386.c (processor_target_table): Add skylake_cost for
+ skylake-avx512.
+ * config/i386/x86-tune-costs.h (skylake_memcpy, skylake_memset,
+ skylake_cost): New.
+
+2017-11-26 Julia Koval <julia.koval@intel.com>
+
+ * config/i386/driver-i386.c (host_detect_local_cpu):
+ Detect skylake-avx512.
+
+2017-11-26 Julia Koval <julia.koval@intel.com>
+
+ * config.gcc: Add -march=cannonlake.
+ * config/i386/driver-i386.c (host_detect_local_cpu): Detect cannonlake.
+ * config/i386/i386-c.c (ix86_target_macros_internal): Handle cannonlake.
+ * config/i386/i386.c (processor_costs): Add m_CANNONLAKE.
+ (PTA_CANNONLAKE): New.
+ (processor_target_table): Add cannonlake.
+ (ix86_option_override_internal): Ditto.
+ (fold_builtin_cpu): Ditto.
+ (get_builtin_code_for_version): Handle cannonlake.
+ (M_INTEL_COREI7_CANNONLAKE): New.
+ * config/i386/i386.h (TARGET_CANNONLAKE, PROCESSOR_CANNONLAKE): New.
+ * doc/invoke.texi: Add -march=cannonlake.
+
+2017-11-14 Boris Kolpackov <boris@codesynthesis.com>
* plugin.c (add_new_plugin): Use platform-specific library extensions.
(try_init_one_plugin): Alternative implementation for MinGW.
2017-11-23 Julia Koval <julia.koval@intel.com>
- config/i386/avx512vbmi2intrin.h (_mm512_mask_expand_epi8,
+ * config/i386/avx512vbmi2intrin.h (_mm512_mask_expand_epi8,
_mm512_maskz_expand_epi8, _mm512_mask_expandloadu_epi8,
_mm512_maskz_expandloadu_epi8, _mm512_mask_expand_epi16,
_mm512_maskz_expand_epi16, _mm512_mask_expandloadu_epi16,
_mm512_maskz_expandloadu_epi16): New intrinsics.
- config/i386/avx512vbmi2vlintrin.h (_mm_mask_expand_epi8,
+ * config/i386/avx512vbmi2vlintrin.h (_mm_mask_expand_epi8,
_mm_maskz_expand_epi8, _mm_mask_expandloadu_epi8,
_mm_maskz_expandloadu_epi8, _mm_mask_expand_epi16,
_mm_maskz_expand_epi16, _mm_mask_expandloadu_epi16,
_mm256_maskz_expandloadu_epi16, _mm256_mask_expand_epi8,
_mm256_maskz_expand_epi8, _mm256_mask_expandloadu_epi8,
_mm256_maskz_expandloadu_epi8): New intrinsics.
- config/i386/i386-builtin-types.def (V64QI_FTYPE_PCV64QI_V64QI_UDI,
+ * config/i386/i386-builtin-types.def (V64QI_FTYPE_PCV64QI_V64QI_UDI,
V32HI_FTYPE_PCV32HI_V32HI_USI, V32QI_FTYPE_PCV32QI_V32QI_USI,
V16HI_FTYPE_PCV16HI_V16HI_UHI, V16QI_FTYPE_PCV16QI_V16QI_UHI,
V8HI_FTYPE_PCV8HI_V8HI_UQI): New types.
- config/i386/i386.c (ix86_expand_special_args_builtin): Use new types.
- config/i386/sse.md (VI248_VLBW): New iterator.
+ * config/i386/i386.c (ix86_expand_special_args_builtin): Use new types.
+ * config/i386/sse.md (VI248_VLBW): New iterator.
(expand<mode>_mask, expand<mode>_maskz): New patterns.
2017-11-23 Julia Koval <julia.koval@intel.com>
- config.gcc (avx512vbmi2intrin.h, avx512vbmi2vlintrin): New headers.
- config/i386/avx512vbmi2intrin.h (_mm512_mask_compress_epi8,
+ * config.gcc (avx512vbmi2intrin.h, avx512vbmi2vlintrin): New headers.
+ * config/i386/avx512vbmi2intrin.h (_mm512_mask_compress_epi8,
_mm512_maskz_compress_epi8, _mm512_mask_compressstoreu_epi8,
_mm512_mask_compress_epi16, _mm512_maskz_compress_epi16,
_mm512_mask_compressstoreu_epi16): New.
- config/i386/avx512vbmi2vlintrin.h (_mm_mask_compress_epi8,
+ * config/i386/avx512vbmi2vlintrin.h (_mm_mask_compress_epi8,
_mm_maskz_compress_epi8, _mm256_mask_compressstoreu_epi16,
_mm_mask_compress_epi16, _mm_maskz_compress_epi16,
_mm256_mask_compress_epi16, _mm256_maskz_compress_epi16,
_mm_mask_compressstoreu_epi8, _mm_mask_compressstoreu_epi16,
_mm256_mask_compress_epi8, _mm256_maskz_compress_epi8,
_mm256_mask_compressstoreu_epi8): New.
- config/i386/i386-builtin-types.def (VOID_FTYPE_PV64QI_V64QI_UDI,
+ * config/i386/i386-builtin-types.def (VOID_FTYPE_PV64QI_V64QI_UDI,
VOID_FTYPE_PV32HI_V32HI_USI, VOID_FTYPE_PV32QI_V32QI_USI,
VOID_FTYPE_PV16QI_V16QI_UHI, VOID_FTYPE_PV16HI_V16HI_UHI,
VOID_FTYPE_PV8HI_V8HI_UQI): New types.
- config/i386/i386-builtin.def (__builtin_ia32_compressqi512_mask,
+ * config/i386/i386-builtin.def (__builtin_ia32_compressqi512_mask,
__builtin_ia32_compresshi512_mask, __builtin_ia32_compressqi256_mask,
__builtin_ia32_compressqi128_mask, __builtin_ia32_compresshi256_mask,
__builtin_ia32_compresshi128_mask,
__builtin_ia32_compressstoreuqi128_mask,
__builtin_ia32_compressstoreuhi256_mask,
__builtin_ia32_compressstoreuhi128_mask): New builtins.
- config/i386/i386.c (ix86_init_mmx_sse_builtins): Create special args
+ * config/i386/i386.c (ix86_init_mmx_sse_builtins): Create special args
array for flags2.
(ix86_expand_special_args_builtin): Handle new types.
(s4fma_expand): Handle new builtin array.
- config/i386/immintrin.h: Include new headers.
- config/i386/sse.md (VI12_AVX512VLBW): New iterator.
+ * config/i386/immintrin.h: Include new headers.
+ * config/i386/sse.md (VI12_AVX512VLBW): New iterator.
(compress<mode>_mask, compressstore<mode>_mask): New patterns.
2017-11-23 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.opt (mavx512vbmi2): New option.
* doc/invoke.texi: Add new option.
-[2017-11-16 Julia Koval <julia.koval@intel.com>
+2017-11-16 Julia Koval <julia.koval@intel.com>
* config/i386/gfniintrin.h (_mm_gf2p8mul_epi8, _mm256_gf2p8mul_epi8,
_mm_mask_gf2p8mul_epi8, _mm_maskz_gf2p8mul_epi8,
bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
-skylake-avx512 x86-64 native"
+skylake-avx512 cannonlake x86-64 native"
# Additional x86 processors supported by --with-cpu=. Each processor
# MUST be separated by exactly one space.
/* Kaby Lake. */
cpu = "skylake";
break;
+ case 0x55:
+ /* Skylake with AVX-512. */
+ cpu = "skylake-avx512";
+ break;
case 0x57:
/* Knights Landing. */
cpu = "knl";
break;
+ case 0x66:
+ /* Cannon Lake. */
+ cpu = "cannonlake";
+ break;
case 0x85:
- /* Knights Mill. */
+ /* Knights Mill. */
cpu = "knm";
break;
default:
if (arch)
{
/* This is unknown family 0x6 CPU. */
- /* Assume Knights Landing. */
- if (has_avx512f)
- cpu = "knl";
- /* Assume Knights Mill */
+ /* Assume Cannon Lake. */
+ if (has_avx512vbmi)
+ cpu = "cannonlake";
+ /* Assume Knights Mill. */
else if (has_avx5124vnniw)
cpu = "knm";
+ /* Assume Knights Landing. */
+ else if (has_avx512er)
+ cpu = "knl";
+ /* Assume Skylake with AVX-512. */
+ else if (has_avx512f)
+ cpu = "skylake-avx512";
/* Assume Skylake. */
else if (has_clflushopt)
cpu = "skylake";
def_or_undef (parse_in, "__skylake_avx512");
def_or_undef (parse_in, "__skylake_avx512__");
break;
+ case PROCESSOR_CANNONLAKE:
+ def_or_undef (parse_in, "__cannonlake");
+ def_or_undef (parse_in, "__cannonlake__");
+ break;
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
break;
case PROCESSOR_SKYLAKE_AVX512:
def_or_undef (parse_in, "__tune_skylake_avx512__");
break;
+ case PROCESSOR_CANNONLAKE:
+ def_or_undef (parse_in, "__tune_cannonlake__");
+ break;
case PROCESSOR_LAKEMONT:
def_or_undef (parse_in, "__tune_lakemont__");
break;
#define m_KNL (1U<<PROCESSOR_KNL)
#define m_KNM (1U<<PROCESSOR_KNM)
#define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
+#define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE)
#define m_INTEL (1U<<PROCESSOR_INTEL)
#define m_GEODE (1U<<PROCESSOR_GEODE)
{"silvermont", &slm_cost, 16, 15, 16, 7, 16},
{"knl", &slm_cost, 16, 15, 16, 7, 16},
{"knm", &slm_cost, 16, 15, 16, 7, 16},
- {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
+ {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
+ {"cannonlake", &core_cost, 16, 10, 16, 10, 16},
{"intel", &intel_cost, 16, 15, 16, 7, 16},
{"geode", &geode_cost, 0, 0, 0, 0, 0},
{"k6", &k6_cost, 32, 7, 32, 7, 32},
#define PTA_SKYLAKE_AVX512 \
(PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
| PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
+#define PTA_CANNONLAKE \
+ (PTA_SKYLAKE_AVX512 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_CLWB)
#define PTA_KNL \
(PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
#define PTA_BONNELL \
{"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
{"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
{"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
- {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL, PTA_SKYLAKE_AVX512},
+ {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
+ PTA_SKYLAKE_AVX512},
+ {"cannonlake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_CANNONLAKE},
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
{"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
break;
case PROCESSOR_HASWELL:
case PROCESSOR_SKYLAKE_AVX512:
- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
+ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VBMI)
+ arg_str = "cannonlake";
+ else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
arg_str = "skylake-avx512";
else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
arg_str = "skylake";
M_INTEL_COREI7_HASWELL,
M_INTEL_COREI7_BROADWELL,
M_INTEL_COREI7_SKYLAKE,
- M_INTEL_COREI7_SKYLAKE_AVX512
+ M_INTEL_COREI7_SKYLAKE_AVX512,
+ M_INTEL_COREI7_CANNONLAKE
};
static struct _arch_names_table
{"broadwell", M_INTEL_COREI7_BROADWELL},
{"skylake", M_INTEL_COREI7_SKYLAKE},
{"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
+ {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
{"bonnell", M_INTEL_BONNELL},
{"silvermont", M_INTEL_SILVERMONT},
{"knl", M_INTEL_KNL},
#define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
#define TARGET_KNM (ix86_tune == PROCESSOR_KNM)
#define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512)
+#define TARGET_CANNONLAKE (ix86_tune == PROCESSOR_CANNONLAKE)
#define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
#define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
PROCESSOR_KNL,
PROCESSOR_KNM,
PROCESSOR_SKYLAKE_AVX512,
+ PROCESSOR_CANNONLAKE,
PROCESSOR_INTEL,
PROCESSOR_GEODE,
PROCESSOR_K6,
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
};
+/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
+static stringop_algs skylake_memcpy[2] = {
+ {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
+ {libcall, {{16, loop, false}, {512, rep_prefix_8_byte, false},
+ {-1, libcall, false}}}};
+
+static stringop_algs skylake_memset[2] = {
+ {libcall, {{6, loop_1_byte, true},
+ {24, loop, true},
+ {8192, rep_prefix_4_byte, true},
+ {-1, libcall, false}}},
+ {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, false},
+ {-1, libcall, false}}}};
+
+static const
+struct processor_costs skylake_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (8), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (76), /* DI */
+ COSTS_N_INSNS (76)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (0), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 10, 20}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
+ 2, 2, /* SSE->integer and integer->SSE moves */
+ 20, 8, /* Gather load static, per_elt. */
+ 22, 10, /* Gather store static, per_elt. */
+ 64, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (20), /* cost of FSQRT instruction. */
+
+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
+ COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (4), /* cost of MULSS instruction. */
+ COSTS_N_INSNS (4), /* cost of MULSD instruction. */
+ COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
+ COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
+ COSTS_N_INSNS (11), /* cost of DIVSS instruction. */
+ COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
+ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
+ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ skylake_memcpy,
+ skylake_memset,
+ COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
+ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+};
/* BTVER1 has optimized REP instruction for medium sized blocks, but for
very small blocks it is better to use loop. For large blocks, libcall can
do nontemporary accesses and beat inline considerably. */
BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVEC, XSAVES, AVX512F,
AVX512VL, AVX512BW, AVX512DQ and AVX512CD instruction set support.
+@item cannonlake
+Intel Cannonlake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2,
+SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE,
+RDRND, FMA, BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVEC,
+XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD, AVX512VBMI,
+AVX512IFMA, SHA, CLWB and UMIP instruction set support.
+
@item k6
AMD K6 CPU with MMX instruction set support.
+2017-11-26 Julia Koval <julia.koval@intel.com>
+
+ * gcc.target/i386/funcspec-56.inc: Handle new march.
+ * g++.dg/ext/mv16.C: Ditto.
+
2017-11-25 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/81553
return 15;
}
+int __attribute__ ((target("arch=cannonlake"))) foo () {
+ return 16;
+}
+
int main ()
{
int val = foo ();
assert (val == 14);
else if (__builtin_cpu_is ("skylake-avx512"))
assert (val == 15);
+ else if (__builtin_cpu_is ("cannonlake"))
+ assert (val == 16);
else
assert (val == 0);
extern void test_arch_knl (void) __attribute__((__target__("arch=knl")));
extern void test_arch_knm (void) __attribute__((__target__("arch=knm")));
extern void test_arch_skylake_avx512 (void) __attribute__((__target__("arch=skylake-avx512")));
+extern void test_arch_cannonlake (void) __attribute__((__target__("arch=cannonlake")));
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));
extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron")));
+2017-11-26 Julia Koval <julia.koval@intel.com>
+
+ * config/i386/cpuinfo.c (get_intel_cpu): Handle cannonlake.
+ * config/i386/cpuinfo.h (processor_subtypes): Add
+ INTEL_COREI7_CANNONLAKE.
+
2017-11-20 Igor Tsimbalist <igor.v.tsimbalist@intel.com>
PR bootstrap/83015
__cpu_model.__cpu_type = INTEL_COREI7;
__cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
break;
+ case 0x66:
+ /* Cannon Lake. */
+ __cpu_model.__cpu_type = INTEL_COREI7;
+ __cpu_model.__cpu_subtype = INTEL_COREI7_CANNONLAKE;
+ break;
case 0x17:
case 0x1d:
/* Penryn. */
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
+ INTEL_COREI7_CANNONLAKE,
CPU_SUBTYPE_MAX
};