From 368466b7b72aed74b917aeb3225d7a0a7101678c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 14 Oct 2014 16:23:18 -0400 Subject: [PATCH] freedreno/ir3: optimize shader key comparision Signed-off-by: Rob Clark --- .../drivers/freedreno/a3xx/fd3_context.h | 3 + src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 21 ++++--- .../drivers/freedreno/a3xx/fd3_texture.c | 10 +++- .../drivers/freedreno/ir3/ir3_shader.c | 27 +++++---- .../drivers/freedreno/ir3/ir3_shader.h | 58 ++++++++++++------- 5 files changed, 79 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h index 324edb2eb80..77e4605e550 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h @@ -92,6 +92,9 @@ struct fd3_context { struct u_upload_mgr *border_color_uploader; struct pipe_resource *border_color_buf; + /* if *any* of bits are set in {v,f}saturate_{s,t,r} */ + bool vsaturate, fsaturate; + /* bitmask of sampler which needs coords clamped for vertex * shader: */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index ccedb391fed..7cc24e598e2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -82,18 +82,20 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) struct fd3_context *fd3_ctx = fd3_context(ctx); struct ir3_shader_key *last_key = &fd3_ctx->last_key; - if (memcmp(last_key, key, sizeof(*key))) { + if (!ir3_shader_key_equal(last_key, key)) { ctx->dirty |= FD_DIRTY_PROG; - if ((last_key->vsaturate_s != key->vsaturate_s) || - (last_key->vsaturate_t != key->vsaturate_t) || - (last_key->vsaturate_r != key->vsaturate_r)) - ctx->prog.dirty |= FD_SHADER_DIRTY_VP; + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->vsaturate_s != key->vsaturate_s) || + (last_key->vsaturate_t != key->vsaturate_t) || + (last_key->vsaturate_r != key->vsaturate_r)) + ctx->prog.dirty |= FD_SHADER_DIRTY_VP; - if ((last_key->fsaturate_s != key->fsaturate_s) || - (last_key->fsaturate_t != key->fsaturate_t) || - (last_key->fsaturate_r != key->fsaturate_r)) - ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + if ((last_key->fsaturate_s != key->fsaturate_s) || + (last_key->fsaturate_t != key->fsaturate_t) || + (last_key->fsaturate_r != key->fsaturate_r)) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + } if (last_key->color_two_side != key->color_two_side) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; @@ -124,6 +126,7 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), + .has_per_samp = fd3_ctx->fsaturate || fd3_ctx->vsaturate, .vsaturate_s = fd3_ctx->vsaturate_s, .vsaturate_t = fd3_ctx->vsaturate_t, .vsaturate_r = fd3_ctx->vsaturate_r, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index d70b39e2114..39befef7672 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -143,7 +143,7 @@ fd3_sampler_states_bind(struct pipe_context *pctx, { struct fd_context *ctx = fd_context(pctx); struct fd3_context *fd3_ctx = fd3_context(ctx); - unsigned saturate_s = 0, saturate_t = 0, saturate_r = 0; + uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0; unsigned i; for (i = 0; i < nr; i++) { @@ -162,10 +162,18 @@ fd3_sampler_states_bind(struct pipe_context *pctx, fd_sampler_states_bind(pctx, shader, start, nr, hwcso); if (shader == PIPE_SHADER_FRAGMENT) { + fd3_ctx->fsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); fd3_ctx->fsaturate_s = saturate_s; fd3_ctx->fsaturate_t = saturate_t; fd3_ctx->fsaturate_r = saturate_r; } else if (shader == PIPE_SHADER_VERTEX) { + fd3_ctx->vsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); fd3_ctx->vsaturate_s = saturate_s; fd3_ctx->vsaturate_t = saturate_t; fd3_ctx->vsaturate_r = saturate_r; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index c77cec10cc7..1f7e869d9f3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -182,23 +182,30 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) * so normalize the key to avoid constructing multiple identical * variants: */ - if (shader->type == SHADER_FRAGMENT) { + switch (shader->type) { + case SHADER_FRAGMENT: + case SHADER_COMPUTE: key.binning_pass = false; - key.vsaturate_s = 0; - key.vsaturate_t = 0; - key.vsaturate_r = 0; - } - if (shader->type == SHADER_VERTEX) { + if (key.has_per_samp) { + key.vsaturate_s = 0; + key.vsaturate_t = 0; + key.vsaturate_r = 0; + } + break; + case SHADER_VERTEX: key.color_two_side = false; key.half_precision = false; key.alpha = false; - key.fsaturate_s = 0; - key.fsaturate_t = 0; - key.fsaturate_r = 0; + if (key.has_per_samp) { + key.fsaturate_s = 0; + key.fsaturate_t = 0; + key.fsaturate_r = 0; + } + break; } for (v = shader->variants; v; v = v->next) - if (!memcmp(&key, &v->key, sizeof(key))) + if (ir3_shader_key_equal(&key, &v->key)) return v; /* compile new variant if it doesn't exist already: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c531ad704cc..628c09e1be3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -54,36 +54,54 @@ static inline uint16_t sem2idx(ir3_semantic sem) * in hw (two sided color), binning-pass vertex shader, etc. */ struct ir3_shader_key { + union { + struct { + /* do we need to check {v,f}saturate_{s,t,r}? */ + unsigned has_per_samp : 1; + + /* + * Vertex shader variant parameters: + */ + unsigned binning_pass : 1; + + /* + * Fragment shader variant parameters: + */ + unsigned color_two_side : 1; + unsigned half_precision : 1; + /* For rendering to alpha, we need a bit of special handling + * since the hw always takes gl_FragColor starting from x + * component, rather than figuring out to take the w component. + * We could be more clever and generate variants for other + * render target formats (ie. luminance formats are xxx1), but + * let's start with this and see how it goes: + */ + unsigned alpha : 1; + }; + uint32_t global; + }; + /* bitmask of sampler which needs coords clamped for vertex * shader: */ - unsigned vsaturate_s, vsaturate_t, vsaturate_r; + uint16_t vsaturate_s, vsaturate_t, vsaturate_r; /* bitmask of sampler which needs coords clamped for frag * shader: */ - unsigned fsaturate_s, fsaturate_t, fsaturate_r; - - /* - * Vertex shader variant parameters: - */ - unsigned binning_pass : 1; + uint16_t fsaturate_s, fsaturate_t, fsaturate_r; - /* - * Fragment shader variant parameters: - */ - unsigned color_two_side : 1; - unsigned half_precision : 1; - /* For rendering to alpha, we need a bit of special handling - * since the hw always takes gl_FragColor starting from x - * component, rather than figuring out to take the w component. - * We could be more clever and generate variants for other - * render target formats (ie. luminance formats are xxx1), but - * let's start with this and see how it goes: - */ - unsigned alpha : 1; }; +static inline bool +ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b) +{ + /* slow-path if we need to check {v,f}saturate_{s,t,r} */ + if (a->has_per_samp || b->has_per_samp) + return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0; + return a->global == b->global; +} + struct ir3_shader_variant { struct fd_bo *bo; -- 2.30.2