From d58600c0e0af98fa2173aaa4dc996ea71502208a Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 31 Dec 2019 21:37:30 -0500 Subject: [PATCH] panfrost: Pack MRT blend shaders into a single BO Blend shader size and location in memory is considerably constrained, probably to facilitate optimizations (my guess is that blend shaders are run strictly out of i-cache). We need to pack the blend shaders for each RT of a single framebuffer together. The easiest way to do this is at draw time which is not terribly efficient but will hold us over for now. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_blend.h | 6 ++-- src/gallium/drivers/panfrost/pan_blend_cso.c | 29 +++++++++++++------- src/gallium/drivers/panfrost/pan_context.c | 25 +++++++++++------ 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_blend.h b/src/gallium/drivers/panfrost/pan_blend.h index 83fe35e325c..1b1cb9df31d 100644 --- a/src/gallium/drivers/panfrost/pan_blend.h +++ b/src/gallium/drivers/panfrost/pan_blend.h @@ -55,8 +55,8 @@ struct panfrost_blend_shader { /* A blend shader descriptor ready for actual use */ struct panfrost_blend_shader_final { - /* The compiled shader in GPU memory, possibly patched */ - struct panfrost_bo *bo; + /* GPU address where we're compiled to */ + uint64_t gpu; /* First instruction tag (for tagging the pointer) */ unsigned first_tag; @@ -113,6 +113,6 @@ void panfrost_blend_context_init(struct pipe_context *pipe); struct panfrost_blend_final -panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rt); +panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset); #endif diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 1dd211e6b4c..b6c46323020 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -227,7 +227,7 @@ panfrost_blend_constant(float *out, float *in, unsigned mask) /* Create a final blend given the context */ struct panfrost_blend_final -panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti) +panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struct panfrost_bo **bo, unsigned *shader_offset) { struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); @@ -273,23 +273,32 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti) final.shader.work_count = shader->work_count; final.shader.first_tag = shader->first_tag; - /* Upload the shader */ - final.shader.bo = panfrost_batch_create_bo(batch, shader->size, - PAN_BO_EXECUTE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT); - memcpy(final.shader.bo->cpu, shader->buffer, shader->size); + /* Upload the shader, sharing a BO */ + if (!(*bo)) { + *bo = panfrost_batch_create_bo(batch, 4096, + PAN_BO_EXECUTE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); + } + + /* Size check */ + assert((*shader_offset + shader->size) < 4096); + + memcpy((*bo)->cpu + *shader_offset, shader->buffer, shader->size); + final.shader.gpu = (*bo)->gpu + *shader_offset; if (shader->patch_index) { /* We have to specialize the blend shader to use constants, so * patch in the current constants */ - float *patch = (float *) (final.shader.bo->cpu + shader->patch_index); + float *patch = (float *) ((*bo)->cpu + *shader_offset + shader->patch_index); memcpy(patch, ctx->blend_color.color, sizeof(float) * 4); } + *shader_offset += shader->size; + return final; } diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 3a8d21d1d8f..8c911b5d001 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -936,9 +936,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS]; + unsigned shader_offset = 0; + struct panfrost_bo *shader_bo = NULL; - for (unsigned c = 0; c < rt_count; ++c) - blend[c] = panfrost_get_blend_for_context(ctx, c); + for (unsigned c = 0; c < rt_count; ++c) { + blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset); + } /* If there is a blend shader, work registers are shared. XXX: opt */ @@ -979,13 +982,17 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Even on MFBD, the shader descriptor gets blend shaders. It's * *also* copied to the blend_meta appended (by convention), * but this is the field actually read by the hardware. (Or - * maybe both are read...?) */ + * maybe both are read...?). Specify the last RTi with a blend + * shader. */ - if (blend[0].is_shader) { - ctx->fragment_shader_core.blend.shader = - blend[0].shader.bo->gpu | blend[0].shader.first_tag; - } else { - ctx->fragment_shader_core.blend.shader = 0; + ctx->fragment_shader_core.blend.shader = 0; + + for (signed rt = (rt_count - 1); rt >= 0; --rt) { + if (blend[rt].is_shader) { + ctx->fragment_shader_core.blend.shader = + blend[rt].shader.gpu | blend[rt].shader.first_tag; + break; + } } if (screen->quirks & MIDGARD_SFBD) { @@ -1039,7 +1046,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) assert(!(is_srgb && blend[i].is_shader)); if (blend[i].is_shader) { - rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag; + rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; } else { rts[i].blend.equation = *blend[i].equation.equation; rts[i].blend.constant = blend[i].equation.constant; -- 2.30.2