From 09394ee6cfe9df2c99373963794c60678da08b39 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 7 Sep 2016 17:19:35 +0100 Subject: [PATCH] anv: device: calculate compute thread numbers using subslices numbers Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke --- src/intel/vulkan/anv_allocator.c | 15 +++++------ src/intel/vulkan/anv_device.c | 43 +++++++++++++++++++++++++++++--- src/intel/vulkan/anv_private.h | 14 +++++++++++ src/intel/vulkan/gen7_pipeline.c | 10 +++++--- src/intel/vulkan/gen8_pipeline.c | 6 +++-- src/intel/vulkan/genX_pipeline.c | 4 ++- 6 files changed, 74 insertions(+), 18 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index c1687b98979..f694aee92d6 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -924,14 +924,15 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, if (size == 0) { /* We own the lock. Allocate a buffer */ - struct gen_device_info *devinfo = &device->info; + struct anv_physical_device *physical_device = + &device->instance->physicalDevice; uint32_t max_threads[] = { - [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads, - [MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads, - [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, - [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + [MESA_SHADER_VERTEX] = physical_device->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = physical_device->max_hs_threads, + [MESA_SHADER_TESS_EVAL] = physical_device->max_ds_threads, + [MESA_SHADER_GEOMETRY] = physical_device->max_gs_threads, + [MESA_SHADER_FRAGMENT] = physical_device->max_wm_threads, + [MESA_SHADER_COMPUTE] = physical_device->max_cs_threads, }; size = per_thread_scratch * max_threads[stage]; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e66f81252d1..fecb8505b89 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -136,6 +136,41 @@ anv_physical_device_init(struct anv_physical_device *device, bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); + device->max_vs_threads = device->info->max_vs_threads; + device->max_hs_threads = device->info->max_hs_threads; + device->max_ds_threads = device->info->max_ds_threads; + device->max_gs_threads = device->info->max_gs_threads; + device->max_wm_threads = device->info->max_wm_threads; + + /* GENs prior to 8 do not support EU/Subslice info */ + if (device->info->gen >= 8) { + device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL); + device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL); + + /* Without this information, we cannot get the right Braswell + * brandstrings, and we have to use conservative numbers for GPGPU on + * many platforms, but otherwise, things will just work. + */ + if (device->subslice_total < 1 || device->eu_total < 1) { + fprintf(stderr, "WARNING: Kernel 4.1 required to properly" + " query GPU properties.\n"); + } + } else if (device->info->gen == 7) { + device->subslice_total = 1 << (device->info->gt - 1); + } + + if (device->info->is_cherryview && + device->subslice_total > 0 && device->eu_total > 0) { + /* Logical CS threads = EUs per subslice * 7 threads per EU */ + device->max_cs_threads = device->eu_total / device->subslice_total * 7; + + /* Fuse configurations may give more threads than expected, never less. */ + if (device->max_cs_threads < device->info->max_cs_threads) + device->max_cs_threads = device->info->max_cs_threads; + } else { + device->max_cs_threads = device->info->max_cs_threads; + } + close(fd); brw_process_intel_debug_variable(); @@ -503,11 +538,11 @@ void anv_GetPhysicalDeviceProperties( .maxFragmentCombinedOutputResources = 8, .maxComputeSharedMemorySize = 32768, .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupInvocations = 16 * pdevice->max_cs_threads, .maxComputeWorkGroupSize = { - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, + 16 * pdevice->max_cs_threads, + 16 * pdevice->max_cs_threads, + 16 * pdevice->max_cs_threads, }, .subPixelPrecisionBits = 4 /* FIXME */, .subTexelPrecisionBits = 4 /* FIXME */, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a083f93fc64..f578a9d9a85 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -570,6 +570,20 @@ struct anv_physical_device { struct isl_device isl_dev; int cmd_parser_version; + uint32_t eu_total; + uint32_t subslice_total; + + /** + * Platform specific constants containing the maximum number of threads + * for each pipeline stage. + */ + uint32_t max_vs_threads; + uint32_t max_hs_threads; + uint32_t max_ds_threads; + uint32_t max_gs_threads; + uint32_t max_wm_threads; + uint32_t max_cs_threads; + struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; }; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 48457aa9a33..878308b7fbc 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -45,6 +45,8 @@ genX(graphics_pipeline_create)( { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_physical_device *physical_device = + &device->instance->physicalDevice; struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; struct anv_pipeline *pipeline; VkResult result; @@ -123,7 +125,7 @@ genX(graphics_pipeline_create)( vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length; vs.VertexURBEntryReadOffset = 0; - vs.MaximumNumberofThreads = device->info.max_vs_threads - 1; + vs.MaximumNumberofThreads = physical_device->max_vs_threads - 1; vs.StatisticsEnable = true; vs.VSFunctionEnable = true; } @@ -152,7 +154,7 @@ genX(graphics_pipeline_create)( gs.DispatchGRFStartRegisterforURBData = gs_prog_data->base.base.dispatch_grf_start_reg; - gs.MaximumNumberofThreads = device->info.max_gs_threads - 1; + gs.MaximumNumberofThreads = physical_device->max_gs_threads - 1; /* This in the next dword on HSW. */ gs.ControlDataFormat = gs_prog_data->control_data_format; gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; @@ -185,7 +187,7 @@ genX(graphics_pipeline_create)( * don't at least set the maximum number of threads. */ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { - ps.MaximumNumberofThreads = device->info.max_wm_threads - 1; + ps.MaximumNumberofThreads = physical_device->max_wm_threads - 1; } } else { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); @@ -207,7 +209,7 @@ genX(graphics_pipeline_create)( .offset = 0, }; ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base); - ps.MaximumNumberofThreads = device->info.max_wm_threads - 1; + ps.MaximumNumberofThreads = physical_device->max_wm_threads - 1; ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 445177a84f5..9f16960ccc7 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -55,6 +55,8 @@ genX(graphics_pipeline_create)( { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_physical_device *physical_device = + &device->instance->physicalDevice; struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; struct anv_pipeline *pipeline; VkResult result; @@ -142,7 +144,7 @@ genX(graphics_pipeline_create)( gs.DispatchGRFStartRegisterForURBData = gs_prog_data->base.base.dispatch_grf_start_reg; - gs.MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1; + gs.MaximumNumberofThreads = physical_device->max_gs_threads / 2 - 1; gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; gs.DispatchMode = gs_prog_data->base.dispatch_mode; gs.StatisticsEnable = true; @@ -213,7 +215,7 @@ genX(graphics_pipeline_create)( vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length; vs.VertexURBEntryReadOffset = 0; - vs.MaximumNumberofThreads = device->info.max_vs_threads - 1; + vs.MaximumNumberofThreads = physical_device->max_vs_threads - 1; vs.StatisticsEnable = false; vs.SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL; vs.VertexCacheDisable = false; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 5a3e1ab2524..3ecc29e953b 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -35,6 +35,8 @@ genX(compute_pipeline_create)( VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_physical_device *physical_device = + &device->instance->physicalDevice; struct anv_pipeline *pipeline; VkResult result; @@ -115,7 +117,7 @@ genX(compute_pipeline_create)( #else vfe.GPGPUMode = true; #endif - vfe.MaximumNumberofThreads = device->info.max_cs_threads - 1; + vfe.MaximumNumberofThreads = physical_device->max_cs_threads - 1; vfe.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2; vfe.ResetGatewayTimer = true; #if GEN_GEN <= 8 -- 2.30.2