i386.c (processor_target_table): Add skylake_cost for skylake-avx512.

author Uros Bizjak <uros@gcc.gnu.org>

Sun, 26 Nov 2017 16:11:29 +0000 (17:11 +0100)

committer Uros Bizjak <uros@gcc.gnu.org>

Sun, 26 Nov 2017 16:11:29 +0000 (17:11 +0100)
author Uros Bizjak <uros@gcc.gnu.org>
Sun, 26 Nov 2017 16:11:29 +0000 (17:11 +0100)
committer Uros Bizjak <uros@gcc.gnu.org>
Sun, 26 Nov 2017 16:11:29 +0000 (17:11 +0100)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index e52effababaa81ca2219a3d48681d1fc1a70ac9d..20c4ab61f1fa91c9881c1ccdb6e4765082d31adf 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,31 @@
-2017-11-14 Boris Kolpackov  <boris@codesynthesis.com>
+2017-11-26  Julia Koval  <julia.koval@intel.com>
+
+       * config/i386/i386.c (processor_target_table): Add skylake_cost for
+       skylake-avx512.
+       * config/i386/x86-tune-costs.h (skylake_memcpy, skylake_memset,
+       skylake_cost): New.
+
+2017-11-26  Julia Koval  <julia.koval@intel.com>
+
+       * config/i386/driver-i386.c (host_detect_local_cpu):
+       Detect skylake-avx512.
+
+2017-11-26  Julia Koval  <julia.koval@intel.com>
+
+       * config.gcc: Add -march=cannonlake.
+       * config/i386/driver-i386.c (host_detect_local_cpu): Detect cannonlake.
+       * config/i386/i386-c.c (ix86_target_macros_internal): Handle cannonlake.
+       * config/i386/i386.c (processor_costs): Add m_CANNONLAKE.
+       (PTA_CANNONLAKE): New.
+       (processor_target_table): Add cannonlake.
+       (ix86_option_override_internal): Ditto.
+       (fold_builtin_cpu): Ditto.
+       (get_builtin_code_for_version): Handle cannonlake.
+       (M_INTEL_COREI7_CANNONLAKE): New.
+       * config/i386/i386.h (TARGET_CANNONLAKE, PROCESSOR_CANNONLAKE): New.
+       * doc/invoke.texi: Add -march=cannonlake.
+
+2017-11-14  Boris Kolpackov  <boris@codesynthesis.com>
  
         * plugin.c (add_new_plugin): Use platform-specific library extensions.
         (try_init_one_plugin): Alternative implementation for MinGW.
@@ -69,12 +96,12 @@
  
  2017-11-23  Julia Koval  <julia.koval@intel.com>
  
-       config/i386/avx512vbmi2intrin.h (_mm512_mask_expand_epi8,
+       * config/i386/avx512vbmi2intrin.h (_mm512_mask_expand_epi8,
         _mm512_maskz_expand_epi8, _mm512_mask_expandloadu_epi8,
         _mm512_maskz_expandloadu_epi8, _mm512_mask_expand_epi16,
         _mm512_maskz_expand_epi16, _mm512_mask_expandloadu_epi16,
         _mm512_maskz_expandloadu_epi16): New intrinsics.
-       config/i386/avx512vbmi2vlintrin.h (_mm_mask_expand_epi8,
+       * config/i386/avx512vbmi2vlintrin.h (_mm_mask_expand_epi8,
         _mm_maskz_expand_epi8, _mm_mask_expandloadu_epi8,
         _mm_maskz_expandloadu_epi8, _mm_mask_expand_epi16,
         _mm_maskz_expand_epi16, _mm_mask_expandloadu_epi16,
@@ -83,33 +110,33 @@
         _mm256_maskz_expandloadu_epi16, _mm256_mask_expand_epi8,
         _mm256_maskz_expand_epi8, _mm256_mask_expandloadu_epi8,
         _mm256_maskz_expandloadu_epi8): New intrinsics.
-       config/i386/i386-builtin-types.def (V64QI_FTYPE_PCV64QI_V64QI_UDI,
+       * config/i386/i386-builtin-types.def (V64QI_FTYPE_PCV64QI_V64QI_UDI,
         V32HI_FTYPE_PCV32HI_V32HI_USI, V32QI_FTYPE_PCV32QI_V32QI_USI,
         V16HI_FTYPE_PCV16HI_V16HI_UHI, V16QI_FTYPE_PCV16QI_V16QI_UHI,
         V8HI_FTYPE_PCV8HI_V8HI_UQI): New types.
-       config/i386/i386.c (ix86_expand_special_args_builtin): Use new types.
-       config/i386/sse.md (VI248_VLBW): New iterator.
+       * config/i386/i386.c (ix86_expand_special_args_builtin): Use new types.
+       * config/i386/sse.md (VI248_VLBW): New iterator.
         (expand<mode>_mask, expand<mode>_maskz): New patterns.
  
  2017-11-23  Julia Koval  <julia.koval@intel.com>
  
-       config.gcc (avx512vbmi2intrin.h, avx512vbmi2vlintrin): New headers.
-       config/i386/avx512vbmi2intrin.h (_mm512_mask_compress_epi8,
+       * config.gcc (avx512vbmi2intrin.h, avx512vbmi2vlintrin): New headers.
+       * config/i386/avx512vbmi2intrin.h (_mm512_mask_compress_epi8,
         _mm512_maskz_compress_epi8, _mm512_mask_compressstoreu_epi8,
         _mm512_mask_compress_epi16, _mm512_maskz_compress_epi16,
         _mm512_mask_compressstoreu_epi16): New.
-       config/i386/avx512vbmi2vlintrin.h (_mm_mask_compress_epi8,
+       * config/i386/avx512vbmi2vlintrin.h (_mm_mask_compress_epi8,
         _mm_maskz_compress_epi8, _mm256_mask_compressstoreu_epi16,
         _mm_mask_compress_epi16, _mm_maskz_compress_epi16,
         _mm256_mask_compress_epi16, _mm256_maskz_compress_epi16,
         _mm_mask_compressstoreu_epi8, _mm_mask_compressstoreu_epi16,
         _mm256_mask_compress_epi8, _mm256_maskz_compress_epi8,
         _mm256_mask_compressstoreu_epi8): New.
-       config/i386/i386-builtin-types.def (VOID_FTYPE_PV64QI_V64QI_UDI,
+       * config/i386/i386-builtin-types.def (VOID_FTYPE_PV64QI_V64QI_UDI,
         VOID_FTYPE_PV32HI_V32HI_USI, VOID_FTYPE_PV32QI_V32QI_USI,
         VOID_FTYPE_PV16QI_V16QI_UHI, VOID_FTYPE_PV16HI_V16HI_UHI,
         VOID_FTYPE_PV8HI_V8HI_UQI): New types.
-       config/i386/i386-builtin.def (__builtin_ia32_compressqi512_mask,
+       * config/i386/i386-builtin.def (__builtin_ia32_compressqi512_mask,
         __builtin_ia32_compresshi512_mask, __builtin_ia32_compressqi256_mask,
         __builtin_ia32_compressqi128_mask, __builtin_ia32_compresshi256_mask,
         __builtin_ia32_compresshi128_mask,
@@ -119,12 +146,12 @@
         __builtin_ia32_compressstoreuqi128_mask,
         __builtin_ia32_compressstoreuhi256_mask,
         __builtin_ia32_compressstoreuhi128_mask): New builtins.
-       config/i386/i386.c (ix86_init_mmx_sse_builtins): Create special args
+       * config/i386/i386.c (ix86_init_mmx_sse_builtins): Create special args
         array for flags2.
         (ix86_expand_special_args_builtin): Handle new types.
         (s4fma_expand): Handle new builtin array.
-       config/i386/immintrin.h: Include new headers.
-       config/i386/sse.md (VI12_AVX512VLBW): New iterator.
+       * config/i386/immintrin.h: Include new headers.
+       * config/i386/sse.md (VI12_AVX512VLBW): New iterator.
         (compress<mode>_mask, compressstore<mode>_mask): New patterns.
  
  2017-11-23  Jakub Jelinek  <jakub@redhat.com>
@@ -1438,7 +1465,7 @@
         * config/i386/i386.opt (mavx512vbmi2): New option.
         * doc/invoke.texi: Add new option.
  
-[2017-11-16  Julia Koval  <julia.koval@intel.com>
+2017-11-16  Julia Koval  <julia.koval@intel.com>
  
         * config/i386/gfniintrin.h (_mm_gf2p8mul_epi8, _mm256_gf2p8mul_epi8,
         _mm_mask_gf2p8mul_epi8, _mm_maskz_gf2p8mul_epi8,
diff --git a/gcc/config.gcc b/gcc/config.gcc

index eb41faf74ba33d26d15b217703574f7e1e9ca1dd..ff870c9357d9d7a9838812211520fdb573b8a7d8 100644 (file)
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -631,7 +631,7 @@ x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
  bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
  core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
  sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
-skylake-avx512 x86-64 native"
+skylake-avx512 cannonlake x86-64 native"
  
  # Additional x86 processors supported by --with-cpu=.  Each processor
  # MUST be separated by exactly one space.
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c

index 8482eeba3d6ebdda91f419db08ca5d9264bdb35c..cfa8bd498a31ae6dc2cf5836273ae2e1fb3ff4ee 100644 (file)
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -793,24 +793,38 @@ const char *host_detect_local_cpu (int argc, const char **argv)
           /* Kaby Lake.  */
           cpu = "skylake";
           break;
+       case 0x55:
+         /* Skylake with AVX-512.  */
+         cpu = "skylake-avx512";
+         break;
         case 0x57:
           /* Knights Landing.  */
           cpu = "knl";
           break;
+       case 0x66:
+         /* Cannon Lake.  */
+         cpu = "cannonlake";
+         break;
         case 0x85:
-         /* Knights Mill. */
+         /* Knights Mill.  */
           cpu = "knm";
           break;
         default:
           if (arch)
             {
               /* This is unknown family 0x6 CPU.  */
-             /* Assume Knights Landing.  */
-             if (has_avx512f)
-               cpu = "knl";
-             /* Assume Knights Mill */
+             /* Assume Cannon Lake.  */
+             if (has_avx512vbmi)
+               cpu = "cannonlake";
+             /* Assume Knights Mill.  */
               else if (has_avx5124vnniw)
                 cpu = "knm";
+             /* Assume Knights Landing.  */
+             else if (has_avx512er)
+               cpu = "knl";
+             /* Assume Skylake with AVX-512.  */
+             else if (has_avx512f)
+               cpu = "skylake-avx512";
               /* Assume Skylake.  */
               else if (has_clflushopt)
                 cpu = "skylake";
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c

index e812ce99b79e9552d9396df93499d6d982ebf848..d1d522aa0b56e0865e7b13b3f30cb55c4a93d4d5 100644 (file)
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -184,6 +184,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
        def_or_undef (parse_in, "__skylake_avx512");
        def_or_undef (parse_in, "__skylake_avx512__");
        break;
+    case PROCESSOR_CANNONLAKE:
+      def_or_undef (parse_in, "__cannonlake");
+      def_or_undef (parse_in, "__cannonlake__");
+      break;
      /* use PROCESSOR_max to not set/unset the arch macro.  */
      case PROCESSOR_max:
        break;
@@ -302,6 +306,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
      case PROCESSOR_SKYLAKE_AVX512:
        def_or_undef (parse_in, "__tune_skylake_avx512__");
        break;
+    case PROCESSOR_CANNONLAKE:
+      def_or_undef (parse_in, "__tune_cannonlake__");
+      break;
      case PROCESSOR_LAKEMONT:
        def_or_undef (parse_in, "__tune_lakemont__");
        break;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 2e528396982efebfe56cc75487da810ec8669b06..5f0358cab872d502c87be09427e2242676a51a00 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -142,6 +142,7 @@ const struct processor_costs *ix86_cost = NULL;
  #define m_KNL (1U<<PROCESSOR_KNL)
  #define m_KNM (1U<<PROCESSOR_KNM)
  #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
+#define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE)
  #define m_INTEL (1U<<PROCESSOR_INTEL)
  
  #define m_GEODE (1U<<PROCESSOR_GEODE)
@@ -853,7 +854,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
    {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
    {"knl", &slm_cost, 16, 15, 16, 7, 16},
    {"knm", &slm_cost, 16, 15, 16, 7, 16},
-  {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
+  {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
+  {"cannonlake", &core_cost, 16, 10, 16, 10, 16},
    {"intel", &intel_cost, 16, 15, 16, 7, 16},
    {"geode", &geode_cost, 0, 0, 0, 0, 0},
    {"k6", &k6_cost, 32, 7, 32, 7, 32},
@@ -3444,6 +3446,8 @@ ix86_option_override_internal (bool main_args_p,
  #define PTA_SKYLAKE_AVX512 \
    (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
     | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
+#define PTA_CANNONLAKE \
+  (PTA_SKYLAKE_AVX512 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_CLWB)
  #define PTA_KNL \
    (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
  #define PTA_BONNELL \
@@ -3516,7 +3520,9 @@ ix86_option_override_internal (bool main_args_p,
        {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
        {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
        {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
-      {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL, PTA_SKYLAKE_AVX512},
+      {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
+        PTA_SKYLAKE_AVX512},
+      {"cannonlake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_CANNONLAKE},
        {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
        {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
        {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
@@ -31230,7 +31236,9 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
               break;
             case PROCESSOR_HASWELL:
             case PROCESSOR_SKYLAKE_AVX512:
-             if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
+             if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VBMI)
+               arg_str = "cannonlake";
+             else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
                 arg_str = "skylake-avx512";
               else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
                 arg_str = "skylake";
@@ -31952,7 +31960,8 @@ fold_builtin_cpu (tree fndecl, tree *args)
      M_INTEL_COREI7_HASWELL,
      M_INTEL_COREI7_BROADWELL,
      M_INTEL_COREI7_SKYLAKE,
-    M_INTEL_COREI7_SKYLAKE_AVX512
+    M_INTEL_COREI7_SKYLAKE_AVX512,
+    M_INTEL_COREI7_CANNONLAKE
    };
  
    static struct _arch_names_table
@@ -31976,6 +31985,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
        {"broadwell", M_INTEL_COREI7_BROADWELL},
        {"skylake", M_INTEL_COREI7_SKYLAKE},
        {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
+      {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
        {"bonnell", M_INTEL_BONNELL},
        {"silvermont", M_INTEL_SILVERMONT},
        {"knl", M_INTEL_KNL},
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

index 630b7c5a4aa8cfc9a0ffd5230a833ebd4607e76e..638f1f15bd8194517f02b38a9d653cb75e064d52 100644 (file)
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -374,6 +374,7 @@ extern const struct processor_costs ix86_size_cost;
  #define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
  #define TARGET_KNM (ix86_tune == PROCESSOR_KNM)
  #define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512)
+#define TARGET_CANNONLAKE (ix86_tune == PROCESSOR_CANNONLAKE)
  #define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
  #define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
  #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
@@ -2255,6 +2256,7 @@ enum processor_type
    PROCESSOR_KNL,
    PROCESSOR_KNM,
    PROCESSOR_SKYLAKE_AVX512,
+  PROCESSOR_CANNONLAKE,
    PROCESSOR_INTEL,
    PROCESSOR_GEODE,
    PROCESSOR_K6,
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h

index c7ac70e8453e0336370652f2683a37418e95d52e..75a59063806b2dd4d211a4969bd7a7b9f282e87a 100644 (file)
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1515,6 +1515,95 @@ struct processor_costs znver1_cost = {
    COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
  };
  
+/* skylake_cost should produce code tuned for Skylake familly of CPUs.  */
+static stringop_algs skylake_memcpy[2] =   {
+  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
+  {libcall, {{16, loop, false}, {512, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+static stringop_algs skylake_memset[2] = {
+  {libcall, {{6, loop_1_byte, true},
+             {24, loop, true},
+             {8192, rep_prefix_4_byte, true},
+             {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+static const
+struct processor_costs skylake_cost = {
+  COSTS_N_INSNS (1),                   /* cost of an add instruction */
+  COSTS_N_INSNS (1)+1,         /* cost of a lea instruction */
+  COSTS_N_INSNS (1),                   /* variable shift costs */
+  COSTS_N_INSNS (1),                   /* constant shift costs */
+  {COSTS_N_INSNS (3),                  /* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),                  /*                               HI */
+   COSTS_N_INSNS (3),                  /*                               SI */
+   COSTS_N_INSNS (4),                  /*                               DI */
+   COSTS_N_INSNS (4)},                 /*                            other */
+  0,                                   /* cost of multiply per each bit set */
+  {COSTS_N_INSNS (8),                  /* cost of a divide/mod for QI */
+   COSTS_N_INSNS (8),                  /*                          HI */
+   COSTS_N_INSNS (11),                 /*                          SI */
+   COSTS_N_INSNS (76),                 /*                          DI */
+   COSTS_N_INSNS (76)},                        /*                          other */
+  COSTS_N_INSNS (1),                   /* cost of movsx */
+  COSTS_N_INSNS (0),                   /* cost of movzx */
+  8,                                   /* "large" insn */
+  17,                                  /* MOVE_RATIO */
+
+  6,                                /* cost for loading QImode using movzbl */
+  {4, 4, 4},                           /* cost of loading integer registers
+                                          in QImode, HImode and SImode.
+                                          Relative to reg-reg move (2).  */
+  {6, 6, 6},                           /* cost of storing integer registers */
+  2,                                   /* cost of reg,reg fld/fst */
+  {6, 6, 8},                           /* cost of loading fp registers
+                                          in SFmode, DFmode and XFmode */
+  {6, 6, 10},                          /* cost of storing fp registers
+                                          in SFmode, DFmode and XFmode */
+  2,                                   /* cost of moving MMX register */
+  {6, 6},                              /* cost of loading MMX registers
+                                          in SImode and DImode */
+  {6, 6},                              /* cost of storing MMX registers
+                                          in SImode and DImode */
+  2, 2, 4,                             /* cost of moving XMM,YMM,ZMM register */
+  {6, 6, 6, 10, 20},                   /* cost of loading SSE registers
+                                          in 32,64,128,256 and 512-bit */
+  {6, 6, 6, 10, 20},                   /* cost of unaligned loads.  */
+  {8, 8, 8, 8, 16},                    /* cost of storing SSE registers
+                                          in 32,64,128,256 and 512-bit */
+  {8, 8, 8, 8, 16},                    /* cost of unaligned stores.  */
+  2, 2,                                        /* SSE->integer and integer->SSE moves */
+  20, 8,                               /* Gather load static, per_elt.  */
+  22, 10,                              /* Gather store static, per_elt.  */
+  64,                                  /* size of l1 cache.  */
+  512,                                 /* size of l2 cache.  */
+  64,                                  /* size of prefetch block */
+  6,                                   /* number of parallel prefetches */
+  3,                                   /* Branch cost */
+  COSTS_N_INSNS (3),                   /* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),                   /* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),                  /* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),                   /* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),                   /* cost of FCHS instruction.  */
+  COSTS_N_INSNS (20),                  /* cost of FSQRT instruction.  */
+
+  COSTS_N_INSNS (1),                   /* cost of cheap SSE instruction.  */
+  COSTS_N_INSNS (4),                   /* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (4),                   /* cost of MULSS instruction.  */
+  COSTS_N_INSNS (4),                   /* cost of MULSD instruction.  */
+  COSTS_N_INSNS (4),                   /* cost of FMA SS instruction.  */
+  COSTS_N_INSNS (4),                   /* cost of FMA SD instruction.  */
+  COSTS_N_INSNS (11),                  /* cost of DIVSS instruction.  */
+  COSTS_N_INSNS (14),                  /* cost of DIVSD instruction.  */
+  COSTS_N_INSNS (12),                  /* cost of SQRTSS instruction.  */
+  COSTS_N_INSNS (18),                  /* cost of SQRTSD instruction.  */
+  1, 4, 2, 2,                          /* reassoc int, fp, vec_int, vec_fp.  */
+  skylake_memcpy,
+  skylake_memset,
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
+};
    /* BTVER1 has optimized REP instruction for medium sized blocks, but for
       very small blocks it is better to use loop. For large blocks, libcall can
       do nontemporary accesses and beat inline considerably.  */
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index eeb87563dc8d7b32ad8dd225eca3045995a48007..cccf39f0b9b0acef33c5d6773bca5d31800f1f45 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -25321,6 +25321,13 @@ SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA
  BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVEC, XSAVES, AVX512F,
  AVX512VL, AVX512BW, AVX512DQ and AVX512CD instruction set support.
  
+@item cannonlake
+Intel Cannonlake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2,
+SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE,
+RDRND, FMA, BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVEC,
+XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD, AVX512VBMI,
+AVX512IFMA, SHA, CLWB and UMIP instruction set support.
+
  @item k6
  AMD K6 CPU with MMX instruction set support.
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index f0169279cd1b490c0d0b5dcd225c4fd66119191f..f750ad7b333db1f5ac8b6f12873871df03bbb070 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-26  Julia Koval  <julia.koval@intel.com>
+
+       * gcc.target/i386/funcspec-56.inc: Handle new march.
+       * g++.dg/ext/mv16.C: Ditto.
+
  2017-11-25  Jakub Jelinek  <jakub@redhat.com>
  
         PR rtl-optimization/81553
diff --git a/gcc/testsuite/g++.dg/ext/mv16.C b/gcc/testsuite/g++.dg/ext/mv16.C

index a3a0fe804fd6f248a19b2e1a94993ffe1abcae30..1e1ae3d2f0d45fad68750ff3f90fcf848df813dc 100644 (file)
--- a/gcc/testsuite/g++.dg/ext/mv16.C
+++ b/gcc/testsuite/g++.dg/ext/mv16.C
@@ -56,6 +56,10 @@ int __attribute__ ((target("arch=skylake-avx512"))) foo () {
    return 15;
  }
  
+int __attribute__ ((target("arch=cannonlake"))) foo () {
+  return 16;
+}
+
  int main ()
  {
    int val = foo ();
@@ -76,6 +80,8 @@ int main ()
      assert (val == 14);
    else if (__builtin_cpu_is ("skylake-avx512"))
      assert (val == 15);
+  else if (__builtin_cpu_is ("cannonlake"))
+    assert (val == 16);
    else
      assert (val == 0);
  
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc

index 9ae74cb9581f5a557c2b26cc349412e2b21c10f6..ed0748bd95d95546d7bb2b4dbd34fc5693e60987 100644 (file)
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -144,6 +144,7 @@ extern void test_arch_core_avx2 (void)              __attribute__((__target__("arch=core-avx
  extern void test_arch_knl (void)               __attribute__((__target__("arch=knl")));
  extern void test_arch_knm (void)               __attribute__((__target__("arch=knm")));
  extern void test_arch_skylake_avx512 (void)    __attribute__((__target__("arch=skylake-avx512")));
+extern void test_arch_cannonlake (void)                __attribute__((__target__("arch=cannonlake")));
  extern void test_arch_k8 (void)                        __attribute__((__target__("arch=k8")));
  extern void test_arch_k8_sse3 (void)           __attribute__((__target__("arch=k8-sse3")));
  extern void test_arch_opteron (void)           __attribute__((__target__("arch=opteron")));
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog

index 4d3539ed0d1c491971ec12e8efa9e82b06ff9f14..6e4db56a4c9bf7b1a29f512336639002889a44ea 100644 (file)
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,9 @@
+2017-11-26  Julia Koval  <julia.koval@intel.com>
+
+       * config/i386/cpuinfo.c (get_intel_cpu): Handle cannonlake.
+       * config/i386/cpuinfo.h (processor_subtypes): Add
+       INTEL_COREI7_CANNONLAKE.
+
  2017-11-20  Igor Tsimbalist  <igor.v.tsimbalist@intel.com>
  
         PR bootstrap/83015
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c

index c2ab8bed88eec5d6261bc67107fbfd8648cf6f38..15799f4c4972fbc944f3a8d249b3d7099b6d78d7 100644 (file)
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -198,6 +198,11 @@ get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
               __cpu_model.__cpu_type = INTEL_COREI7;
               __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
               break;
+           case 0x66:
+             /* Cannon Lake.  */
+             __cpu_model.__cpu_type = INTEL_COREI7;
+             __cpu_model.__cpu_subtype = INTEL_COREI7_CANNONLAKE;
+             break;
             case 0x17:
             case 0x1d:
               /* Penryn.  */
diff --git a/libgcc/config/i386/cpuinfo.h b/libgcc/config/i386/cpuinfo.h

index 397840160c08b707ac33b7704525ecc17ddf49d7..4b2a3c534c8e67bb2635100f36962fbca9f2e9fa 100644 (file)
--- a/libgcc/config/i386/cpuinfo.h
+++ b/libgcc/config/i386/cpuinfo.h
@@ -69,6 +69,7 @@ enum processor_subtypes
    INTEL_COREI7_BROADWELL,
    INTEL_COREI7_SKYLAKE,
    INTEL_COREI7_SKYLAKE_AVX512,
+  INTEL_COREI7_CANNONLAKE,
    CPU_SUBTYPE_MAX
  };
author	Uros Bizjak <uros@gcc.gnu.org>
	Sun, 26 Nov 2017 16:11:29 +0000 (17:11 +0100)
committer	Uros Bizjak <uros@gcc.gnu.org>
	Sun, 26 Nov 2017 16:11:29 +0000 (17:11 +0100)
gcc/ChangeLog		patch \| blob \| history
gcc/config.gcc		patch \| blob \| history
gcc/config/i386/driver-i386.c		patch \| blob \| history
gcc/config/i386/i386-c.c		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history
gcc/config/i386/i386.h		patch \| blob \| history
gcc/config/i386/x86-tune-costs.h		patch \| blob \| history
gcc/doc/invoke.texi		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/g++.dg/ext/mv16.C		patch \| blob \| history
gcc/testsuite/gcc.target/i386/funcspec-56.inc		patch \| blob \| history
libgcc/ChangeLog		patch \| blob \| history
libgcc/config/i386/cpuinfo.c		patch \| blob \| history
libgcc/config/i386/cpuinfo.h		patch \| blob \| history