From: Caio Marcelo de Oliveira Filho Date: Thu, 21 May 2020 06:32:18 +0000 (-0700) Subject: iris, i965: Update limits for ARB_compute_variable_group_size X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=78e400d4a515e8d8187259ed1287dd4671dee9ca;p=mesa.git iris, i965: Update limits for ARB_compute_variable_group_size The CS compiler now produces multiple SIMD variants, so the previous trade-off between "always using SIMD32" and "having a smaller max invocations" is now gone. Reviewed-by: Jason Ekstrand Part-of: --- diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 6695932bd0b..438ff3b4f6c 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -435,32 +435,6 @@ iris_get_shader_param(struct pipe_screen *pscreen, } } -static unsigned -get_max_threads(const struct gen_device_info *devinfo) -{ - /* Limit max_threads to 64 for the GPGPU_WALKER command. */ - return MIN2(64, devinfo->max_cs_threads); -} - -uint32_t -iris_get_max_var_invocations(const struct iris_screen *screen) -{ - const unsigned max_threads = get_max_threads(&screen->devinfo); - - /* Constants used for ARB_compute_variable_group_size. The compiler will - * use the maximum to decide which SIMDs can be used. If we top this like - * max_invocations, that would prevent SIMD8 / SIMD16 to be considered. - * - * TODO: To avoid the trade off above between having the lower maximum - * vs. always using SIMD32, keep all three shader variants (for each SIMD) - * and select a suitable one at dispatch time. - */ - const uint32_t max_var_invocations = - (max_threads >= 64 ? 8 : (max_threads >= 32 ? 16 : 32)) * max_threads; - assert(max_var_invocations >= 512); - return max_var_invocations; -} - static int iris_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, @@ -468,8 +442,10 @@ iris_get_compute_param(struct pipe_screen *pscreen, void *ret) { struct iris_screen *screen = (struct iris_screen *)pscreen; + const struct gen_device_info *devinfo = &screen->devinfo; - const unsigned max_threads = get_max_threads(&screen->devinfo); + /* Limit max_threads to 64 for the GPGPU_WALKER command. */ + const unsigned max_threads = MIN2(64, devinfo->max_cs_threads); const uint32_t max_invocations = 32 * max_threads; #define RET(x) do { \ @@ -499,6 +475,8 @@ iris_get_compute_param(struct pipe_screen *pscreen, case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: /* MaxComputeWorkGroupInvocations */ + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: + /* MaxComputeVariableGroupInvocations */ RET((uint64_t []) { max_invocations }); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: @@ -511,10 +489,6 @@ iris_get_compute_param(struct pipe_screen *pscreen, case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: RET((uint32_t []) { BRW_SUBGROUP_SIZE }); - case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: - /* MaxComputeVariableGroupInvocations */ - RET((uint64_t []) { iris_get_max_var_invocations(screen) }); - case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index 87d8a5580d7..bfc5284fc72 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -238,6 +238,4 @@ iris_is_format_supported(struct pipe_screen *pscreen, void iris_disk_cache_init(struct iris_screen *screen); -uint32_t iris_get_max_var_invocations(const struct iris_screen *screen); - #endif diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 42a0c663b6e..6ca20f24b94 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -836,22 +836,13 @@ brw_initialize_cs_context_constants(struct brw_context *brw) ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; - /* Constants used for ARB_compute_variable_group_size. The compiler will - * use the maximum to decide which SIMDs can be used. If we top this like - * max_invocations, that would prevent SIMD8 / SIMD16 to be considered. - * - * TODO: To avoid the trade off above between having the lower maximum - * vs. always using SIMD32, keep all three shader variants (for each SIMD) - * and select a suitable one at dispatch time. - */ + /* Constants used for ARB_compute_variable_group_size. */ if (devinfo->gen >= 7) { - const uint32_t max_var_invocations = - (max_threads >= 64 ? 8 : (max_threads >= 32 ? 16 : 32)) * max_threads; - assert(max_var_invocations >= 512); - ctx->Const.MaxComputeVariableGroupSize[0] = max_var_invocations; - ctx->Const.MaxComputeVariableGroupSize[1] = max_var_invocations; - ctx->Const.MaxComputeVariableGroupSize[2] = max_var_invocations; - ctx->Const.MaxComputeVariableGroupInvocations = max_var_invocations; + assert(max_invocations >= 512); + ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations; + ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations; + ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations; + ctx->Const.MaxComputeVariableGroupInvocations = max_invocations; } }