anv: device: calculate compute thread numbers using subslices numbers

author Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Wed, 7 Sep 2016 16:19:35 +0000 (17:19 +0100)

committer Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Wed, 21 Sep 2016 09:01:06 +0000 (12:01 +0300)
author Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 7 Sep 2016 16:19:35 +0000 (17:19 +0100)
committer Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 21 Sep 2016 09:01:06 +0000 (12:01 +0300)
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c

index c1687b989794028cfb8648fa2ae679f974da94e4..f694aee92d676cf2ad67a60eadcb21633a3508c7 100644 (file)
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -924,14 +924,15 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
     if (size == 0) {
        /* We own the lock.  Allocate a buffer */
  
-      struct gen_device_info *devinfo = &device->info;
+      struct anv_physical_device *physical_device =
+         &device->instance->physicalDevice;
        uint32_t max_threads[] = {
-         [MESA_SHADER_VERTEX]                  = devinfo->max_vs_threads,
-         [MESA_SHADER_TESS_CTRL]               = devinfo->max_hs_threads,
-         [MESA_SHADER_TESS_EVAL]               = devinfo->max_ds_threads,
-         [MESA_SHADER_GEOMETRY]                = devinfo->max_gs_threads,
-         [MESA_SHADER_FRAGMENT]                = devinfo->max_wm_threads,
-         [MESA_SHADER_COMPUTE]                 = devinfo->max_cs_threads,
+         [MESA_SHADER_VERTEX]           = physical_device->max_vs_threads,
+         [MESA_SHADER_TESS_CTRL]        = physical_device->max_hs_threads,
+         [MESA_SHADER_TESS_EVAL]        = physical_device->max_ds_threads,
+         [MESA_SHADER_GEOMETRY]         = physical_device->max_gs_threads,
+         [MESA_SHADER_FRAGMENT]         = physical_device->max_wm_threads,
+         [MESA_SHADER_COMPUTE]          = physical_device->max_cs_threads,
        };
  
        size = per_thread_scratch * max_threads[stage];
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c

index e66f81252d1f2059c494bd9bdfa2d43e909d6771..fecb8505b89c574bed362c72dd4471e8afa34bf1 100644 (file)
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -136,6 +136,41 @@ anv_physical_device_init(struct anv_physical_device *device,
  
     bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
  
+   device->max_vs_threads = device->info->max_vs_threads;
+   device->max_hs_threads = device->info->max_hs_threads;
+   device->max_ds_threads = device->info->max_ds_threads;
+   device->max_gs_threads = device->info->max_gs_threads;
+   device->max_wm_threads = device->info->max_wm_threads;
+
+   /* GENs prior to 8 do not support EU/Subslice info */
+   if (device->info->gen >= 8) {
+      device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
+      device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
+
+      /* Without this information, we cannot get the right Braswell
+       * brandstrings, and we have to use conservative numbers for GPGPU on
+       * many platforms, but otherwise, things will just work.
+       */
+      if (device->subslice_total < 1 || device->eu_total < 1) {
+         fprintf(stderr, "WARNING: Kernel 4.1 required to properly"
+                         " query GPU properties.\n");
+      }
+   } else if (device->info->gen == 7) {
+      device->subslice_total = 1 << (device->info->gt - 1);
+   }
+
+   if (device->info->is_cherryview &&
+       device->subslice_total > 0 && device->eu_total > 0) {
+      /* Logical CS threads = EUs per subslice * 7 threads per EU */
+      device->max_cs_threads = device->eu_total / device->subslice_total * 7;
+
+      /* Fuse configurations may give more threads than expected, never less. */
+      if (device->max_cs_threads < device->info->max_cs_threads)
+         device->max_cs_threads = device->info->max_cs_threads;
+   } else {
+      device->max_cs_threads = device->info->max_cs_threads;
+   }
+
     close(fd);
  
     brw_process_intel_debug_variable();
@@ -503,11 +538,11 @@ void anv_GetPhysicalDeviceProperties(
        .maxFragmentCombinedOutputResources       = 8,
        .maxComputeSharedMemorySize               = 32768,
        .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
-      .maxComputeWorkGroupInvocations           = 16 * devinfo->max_cs_threads,
+      .maxComputeWorkGroupInvocations           = 16 * pdevice->max_cs_threads,
        .maxComputeWorkGroupSize = {
-         16 * devinfo->max_cs_threads,
-         16 * devinfo->max_cs_threads,
-         16 * devinfo->max_cs_threads,
+         16 * pdevice->max_cs_threads,
+         16 * pdevice->max_cs_threads,
+         16 * pdevice->max_cs_threads,
        },
        .subPixelPrecisionBits                    = 4 /* FIXME */,
        .subTexelPrecisionBits                    = 4 /* FIXME */,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index a083f93fc640a1915454ddb95cd09407dbd9ba7a..f578a9d9a85976f9b415723fa0022d3fa2381438 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -570,6 +570,20 @@ struct anv_physical_device {
      struct isl_device                           isl_dev;
      int                                         cmd_parser_version;
  
+    uint32_t                                    eu_total;
+    uint32_t                                    subslice_total;
+
+    /**
+     * Platform specific constants containing the maximum number of threads
+     * for each pipeline stage.
+     */
+    uint32_t                                    max_vs_threads;
+    uint32_t                                    max_hs_threads;
+    uint32_t                                    max_ds_threads;
+    uint32_t                                    max_gs_threads;
+    uint32_t                                    max_wm_threads;
+    uint32_t                                    max_cs_threads;
+
      struct anv_wsi_interface *                  wsi[VK_ICD_WSI_PLATFORM_MAX];
  };
  
diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c

index 48457aa9a334e26d321ecd331d009c5d95577378..878308b7fbc1405209cb94e42e7a528c885b1052 100644 (file)
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -45,6 +45,8 @@ genX(graphics_pipeline_create)(
  {
     ANV_FROM_HANDLE(anv_device, device, _device);
     ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
+   struct anv_physical_device *physical_device =
+      &device->instance->physicalDevice;
     struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
     struct anv_pipeline *pipeline;
     VkResult result;
@@ -123,7 +125,7 @@ genX(graphics_pipeline_create)(
  
           vs.VertexURBEntryReadLength   = vs_prog_data->base.urb_read_length;
           vs.VertexURBEntryReadOffset   = 0;
-         vs.MaximumNumberofThreads     = device->info.max_vs_threads - 1;
+         vs.MaximumNumberofThreads     = physical_device->max_vs_threads - 1;
           vs.StatisticsEnable           = true;
           vs.VSFunctionEnable           = true;
        }
@@ -152,7 +154,7 @@ genX(graphics_pipeline_create)(
           gs.DispatchGRFStartRegisterforURBData =
              gs_prog_data->base.base.dispatch_grf_start_reg;
  
-         gs.MaximumNumberofThreads     = device->info.max_gs_threads - 1;
+         gs.MaximumNumberofThreads     = physical_device->max_gs_threads - 1;
           /* This in the next dword on HSW. */
           gs.ControlDataFormat          = gs_prog_data->control_data_format;
           gs.ControlDataHeaderSize      = gs_prog_data->control_data_header_size_hwords;
@@ -185,7 +187,7 @@ genX(graphics_pipeline_create)(
         * don't at least set the maximum number of threads.
         */
        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.MaximumNumberofThreads = device->info.max_wm_threads - 1;
+         ps.MaximumNumberofThreads = physical_device->max_wm_threads - 1;
        }
     } else {
        const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
@@ -207,7 +209,7 @@ genX(graphics_pipeline_create)(
              .offset = 0,
           };
           ps.PerThreadScratchSpace         = scratch_space(&wm_prog_data->base);
-         ps.MaximumNumberofThreads        = device->info.max_wm_threads - 1;
+         ps.MaximumNumberofThreads        = physical_device->max_wm_threads - 1;
           ps.PushConstantEnable            = wm_prog_data->base.nr_params > 0;
           ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
           ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c

index 445177a84f5e02b1059aab37b443309976c5aa9d..9f16960ccc7dc66ad3327b1afa5cda4921102933 100644 (file)
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -55,6 +55,8 @@ genX(graphics_pipeline_create)(
  {
     ANV_FROM_HANDLE(anv_device, device, _device);
     ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
+   struct anv_physical_device *physical_device =
+      &device->instance->physicalDevice;
     struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
     struct anv_pipeline *pipeline;
     VkResult result;
@@ -142,7 +144,7 @@ genX(graphics_pipeline_create)(
           gs.DispatchGRFStartRegisterForURBData =
              gs_prog_data->base.base.dispatch_grf_start_reg;
  
-         gs.MaximumNumberofThreads  = device->info.max_gs_threads / 2 - 1;
+         gs.MaximumNumberofThreads  = physical_device->max_gs_threads / 2 - 1;
           gs.ControlDataHeaderSize   = gs_prog_data->control_data_header_size_hwords;
           gs.DispatchMode            = gs_prog_data->base.dispatch_mode;
           gs.StatisticsEnable        = true;
@@ -213,7 +215,7 @@ genX(graphics_pipeline_create)(
           vs.VertexURBEntryReadLength      = vs_prog_data->base.urb_read_length;
           vs.VertexURBEntryReadOffset      = 0;
  
-         vs.MaximumNumberofThreads        = device->info.max_vs_threads - 1;
+         vs.MaximumNumberofThreads        = physical_device->max_vs_threads - 1;
           vs.StatisticsEnable              = false;
           vs.SIMD8DispatchEnable           = pipeline->vs_simd8 != NO_KERNEL;
           vs.VertexCacheDisable            = false;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c

index 5a3e1ab2524a696a2a2b8cbf370be66411ec2fac..3ecc29e953b6f4e29c67819409c7bf6f51e9d579 100644 (file)
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -35,6 +35,8 @@ genX(compute_pipeline_create)(
      VkPipeline*                                 pPipeline)
  {
     ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_physical_device *physical_device =
+      &device->instance->physicalDevice;
     struct anv_pipeline *pipeline;
     VkResult result;
  
@@ -115,7 +117,7 @@ genX(compute_pipeline_create)(
  #else
        vfe.GPGPUMode              = true;
  #endif
-      vfe.MaximumNumberofThreads = device->info.max_cs_threads - 1;
+      vfe.MaximumNumberofThreads = physical_device->max_cs_threads - 1;
        vfe.NumberofURBEntries     = GEN_GEN <= 7 ? 0 : 2;
        vfe.ResetGatewayTimer      = true;
  #if GEN_GEN <= 8
author	Lionel Landwerlin <lionel.g.landwerlin@intel.com>
	Wed, 7 Sep 2016 16:19:35 +0000 (17:19 +0100)
committer	Lionel Landwerlin <lionel.g.landwerlin@intel.com>
	Wed, 21 Sep 2016 09:01:06 +0000 (12:01 +0300)
src/intel/vulkan/anv_allocator.c		patch \| blob \| history
src/intel/vulkan/anv_device.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/gen7_pipeline.c		patch \| blob \| history
src/intel/vulkan/gen8_pipeline.c		patch \| blob \| history
src/intel/vulkan/genX_pipeline.c		patch \| blob \| history