From b9e02fe138ef181f02fd739129517fbe70604af6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 27 Feb 2019 17:19:53 -0500 Subject: [PATCH] gallium: add pipe_grid_info::last_block The OpenMAX state tracker will use this. RadeonSI is adapted to use pipe_grid_info::last_block instead of its internal state. Acked-by: Leo Liu --- src/gallium/auxiliary/util/u_screen.c | 3 +++ src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/radeonsi/si_compute.c | 2 +- .../drivers/radeonsi/si_compute_blit.c | 18 +++++---------- src/gallium/drivers/radeonsi/si_get.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 22 ------------------- src/gallium/include/pipe/p_defines.h | 1 + src/gallium/include/pipe/p_state.h | 21 ++++++++++++++++++ 8 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c index 50964f3b3ef..b902c083ad4 100644 --- a/src/gallium/auxiliary/util/u_screen.c +++ b/src/gallium/auxiliary/util/u_screen.c @@ -341,6 +341,9 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_MAX_VARYINGS: return 8; + case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK: + return 0; + default: unreachable("bad PIPE_CAP_*"); } diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 85ca5e1f5ce..60ba9bcbde0 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -492,6 +492,8 @@ The integer capabilities: varyings. This will generally correspond to ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some cases may be a smaller number. +* ``PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK``: Whether pipe_grid_info::last_block + is implemented by the driver. See struct pipe_grid_info for more details. .. _pipe_capf: diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 87addd53976..6c2269d903a 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -804,7 +804,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, * allow launching waves out-of-order. (same as Vulkan) */ S_00B800_ORDER_MODE(sctx->chip_class >= CIK); - uint *last_block = sctx->compute_last_block; + uint *last_block = info->last_block; bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index f5e9c02dd10..a7453099ac6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -381,7 +381,7 @@ void si_compute_copy_image(struct si_context *sctx, si_create_copy_image_compute_shader_1d_array(ctx); ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array); info.block[0] = 64; - sctx->compute_last_block[0] = width % 64; + info.last_block[0] = width % 64; info.block[1] = 1; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 64); @@ -392,9 +392,9 @@ void si_compute_copy_image(struct si_context *sctx, sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx); ctx->bind_compute_state(ctx, sctx->cs_copy_image); info.block[0] = 8; - sctx->compute_last_block[0] = width % 8; + info.last_block[0] = width % 8; info.block[1] = 8; - sctx->compute_last_block[1] = height % 8; + info.last_block[1] = height % 8; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 8); info.grid[1] = DIV_ROUND_UP(height, 8); @@ -403,9 +403,6 @@ void si_compute_copy_image(struct si_context *sctx, ctx->launch_grid(ctx, &info); - sctx->compute_last_block[0] = 0; - sctx->compute_last_block[1] = 0; - sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); @@ -483,9 +480,9 @@ void si_compute_clear_render_target(struct pipe_context *ctx, sctx->cs_clear_render_target = si_clear_render_target_shader(ctx); ctx->bind_compute_state(ctx, sctx->cs_clear_render_target); info.block[0] = 8; - sctx->compute_last_block[0] = width % 8; + info.last_block[0] = width % 8; info.block[1] = 8; - sctx->compute_last_block[1] = height % 8; + info.last_block[1] = height % 8; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 8); info.grid[1] = DIV_ROUND_UP(height, 8); @@ -496,7 +493,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx, si_clear_render_target_shader_1d_array(ctx); ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array); info.block[0] = 64; - sctx->compute_last_block[0] = width % 64; + info.last_block[0] = width % 64; info.block[1] = 1; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 64); @@ -506,9 +503,6 @@ void si_compute_clear_render_target(struct pipe_context *ctx, ctx->launch_grid(ctx, &info); - sctx->compute_last_block[0] = 0; - sctx->compute_last_block[1] = 0; - sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index a5cb209b59e..6fa67087c7d 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -160,6 +160,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b3198d45ea6..b6858b46ec0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -921,28 +921,6 @@ struct si_context { uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD]; uint32_t cs_user_data[4]; - /** - * last_block allows disabling threads at the farthermost grid boundary. - * Full blocks as specified by "block" are launched, but the threads - * outside of "last_block" dimensions are disabled. - * - * If a block touches the grid boundary in the i-th axis, threads with - * THREAD_ID[i] >= last_block[i] are disabled. - * - * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i], - * meaning no effect. - * - * It's equivalent to doing this at the beginning of the compute shader: - * - * for (i = 0; i < 3; i++) { - * if (block_id[i] == grid[i] - 1 && - * last_block[i] && last_block[i] >= thread_id[i]) - * return; - * } - * (this could be moved into pipe_grid_info) - */ - uint compute_last_block[3]; - /* Vertex and index buffers. */ bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index e2b0104ce43..d4732dc257f 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -858,6 +858,7 @@ enum pipe_cap PIPE_CAP_DEST_SURFACE_SRGB_CONTROL, PIPE_CAP_NIR_COMPACT_ARRAYS, PIPE_CAP_MAX_VARYINGS, + PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK, }; /** diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 38052e5fd3d..3a91ddd71b5 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -838,6 +838,27 @@ struct pipe_grid_info */ uint block[3]; + /** + * last_block allows disabling threads at the farthermost grid boundary. + * Full blocks as specified by "block" are launched, but the threads + * outside of "last_block" dimensions are disabled. + * + * If a block touches the grid boundary in the i-th axis, threads with + * THREAD_ID[i] >= last_block[i] are disabled. + * + * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i], + * meaning no effect. + * + * It's equivalent to doing this at the beginning of the compute shader: + * + * for (i = 0; i < 3; i++) { + * if (block_id[i] == grid[i] - 1 && + * last_block[i] && thread_id[i] >= last_block[i]) + * return; + * } + */ + uint last_block[3]; + /** * Determine the layout of the grid (in block units) to be used. */ -- 2.30.2