From: Sonny Jiang Date: Mon, 21 Jan 2019 23:16:40 +0000 (-0500) Subject: radeonsi: use compute for clear_render_target when possible X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=984fd73515270fbc9c934ccad63a9d6a958b4b04;p=mesa.git radeonsi: use compute for clear_render_target when possible Signed-off-by: Sonny Jiang Signed-off-by: Marek Olšák --- diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 9026f61dc0a..cac979733ed 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -680,6 +680,12 @@ static void si_clear_render_target(struct pipe_context *ctx, bool render_condition_enabled) { struct si_context *sctx = (struct si_context *)ctx; + struct si_texture *sdst = (struct si_texture*)dst->texture; + + if (dst->texture->nr_samples <= 1 && !sdst->dcc_offset) { + si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, height); + return; + } si_blitter_begin(sctx, SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND)); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 38c48c30be9..f06497f4dac 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -25,6 +25,7 @@ #include "si_pipe.h" #include "util/u_format.h" +#include "util/format_srgb.h" /* Note: Compute shaders always use SI_COMPUTE_DST_CACHE_POLICY for dst * and L2_STREAM for src. @@ -425,3 +426,98 @@ void si_init_compute_blit_functions(struct si_context *sctx) { sctx->b.clear_buffer = si_pipe_clear_buffer; } + +/* Clear a region of a color surface to a constant value. */ +void si_compute_clear_render_target(struct pipe_context *ctx, + struct pipe_surface *dstsurf, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct si_context *sctx = (struct si_context *)ctx; + unsigned num_layers = dstsurf->u.tex.last_layer - dstsurf->u.tex.first_layer + 1; + unsigned data[4 + sizeof(color->ui)] = {dstx, dsty, dstsurf->u.tex.first_layer, 0}; + + if (width == 0 || height == 0) + return; + + if (util_format_is_srgb(dstsurf->format)) { + union pipe_color_union color_srgb; + for (int i = 0; i < 3; i++) + color_srgb.f[i] = util_format_linear_to_srgb_float(color->f[i]); + color_srgb.f[3] = color->f[3]; + memcpy(data + 4, color_srgb.ui, sizeof(color->ui)); + } else { + memcpy(data + 4, color->ui, sizeof(color->ui)); + } + + si_compute_internal_begin(sctx); + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); + si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true); + + struct pipe_constant_buffer saved_cb = {}; + si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); + + struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE]; + struct pipe_image_view saved_image = {0}; + util_copy_image_view(&saved_image, &images->views[0]); + + void *saved_cs = sctx->cs_shader_state.program; + + struct pipe_constant_buffer cb = {}; + cb.buffer_size = sizeof(data); + cb.user_buffer = data; + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb); + + struct pipe_image_view image = {0}; + image.resource = dstsurf->texture; + image.shader_access = image.access = PIPE_IMAGE_ACCESS_WRITE; + image.format = util_format_linear(dstsurf->format); + image.u.tex.level = dstsurf->u.tex.level; + image.u.tex.first_layer = 0; /* 3D images ignore first_layer (BASE_ARRAY) */ + image.u.tex.last_layer = dstsurf->u.tex.last_layer; + + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image); + + struct pipe_grid_info info = {0}; + + if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) { + if (!sctx->cs_clear_render_target) + sctx->cs_clear_render_target = si_clear_render_target_shader(ctx); + ctx->bind_compute_state(ctx, sctx->cs_clear_render_target); + info.block[0] = 8; + sctx->compute_last_block[0] = width % 8; + info.block[1] = 8; + sctx->compute_last_block[1] = height % 8; + info.block[2] = 1; + info.grid[0] = DIV_ROUND_UP(width, 8); + info.grid[1] = DIV_ROUND_UP(height, 8); + info.grid[2] = num_layers; + } else { + if (!sctx->cs_clear_render_target_1d_array) + sctx->cs_clear_render_target_1d_array = + si_clear_render_target_shader_1d_array(ctx); + ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array); + info.block[0] = 64; + sctx->compute_last_block[0] = width % 64; + info.block[1] = 1; + info.block[2] = 1; + info.grid[0] = DIV_ROUND_UP(width, 64); + info.grid[1] = num_layers; + info.grid[2] = 1; + } + + ctx->launch_grid(ctx, &info); + + sctx->compute_last_block[0] = 0; + sctx->compute_last_block[1] = 0; + + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | + (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); + ctx->bind_compute_state(ctx, saved_cs); + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image); + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); + si_compute_internal_end(sctx); +} diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 41d395d7d3f..439b550c4cf 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -205,6 +205,10 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image); if (sctx->cs_copy_image_1d_array) sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1d_array); + if (sctx->cs_clear_render_target) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target); + if (sctx->cs_clear_render_target_1d_array) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array); if (sctx->blitter) util_blitter_destroy(sctx->blitter); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index eb3ba951dae..4f2845854c3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -802,6 +802,8 @@ struct si_context { void *cs_copy_buffer; void *cs_copy_image; void *cs_copy_image_1d_array; + void *cs_clear_render_target; + void *cs_clear_render_target_1d_array; struct si_screen *screen; struct pipe_debug_callback debug; struct ac_llvm_compiler compiler; /* only non-threaded compilation */ @@ -1179,6 +1181,11 @@ void si_compute_copy_image(struct si_context *sctx, unsigned src_level, unsigned dstx, unsigned dsty, unsigned dstz, const struct pipe_box *src_box); +void si_compute_clear_render_target(struct pipe_context *ctx, + struct pipe_surface *dstsurf, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height); void si_init_compute_blit_functions(struct si_context *sctx); /* si_cp_dma.c */ @@ -1294,6 +1301,8 @@ void *si_create_dma_compute_shader(struct pipe_context *ctx, bool dst_stream_cache_policy, bool is_copy); void *si_create_copy_image_compute_shader(struct pipe_context *ctx); void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx); +void *si_clear_render_target_shader(struct pipe_context *ctx); +void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx); void *si_create_query_result_cs(struct si_context *sctx); /* si_test_dma.c */ diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index 55f96b3a25e..91a23b1d7ed 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -516,3 +516,72 @@ void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx) return ctx->create_compute_state(ctx, &state); } + +void *si_clear_render_target_shader(struct pipe_context *ctx) +{ + static const char text[] = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw + "DCL TEMP[0..3], LOCAL\n" + "IMM[0] UINT32 {8, 1, 0, 0}\n" + "MOV TEMP[0].xyz, CONST[0][0].xyzw\n" + "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n" + "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n" + "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n" + "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" + "END\n"; + + struct tgsi_token tokens[1024]; + struct pipe_compute_state state = {0}; + + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(false); + return NULL; + } + + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + return ctx->create_compute_state(ctx, &state); +} + +/* TODO: Didn't really test 1D_ARRAY */ +void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx) +{ + static const char text[] = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw + "DCL TEMP[0..3], LOCAL\n" + "IMM[0] UINT32 {64, 1, 0, 0}\n" + "MOV TEMP[0].xy, CONST[0][0].xzzw\n" + "UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n" + "UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n" + "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n" + "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" + "END\n"; + + struct tgsi_token tokens[1024]; + struct pipe_compute_state state = {0}; + + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(false); + return NULL; + } + + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + return ctx->create_compute_state(ctx, &state); +}