From: Rob Clark Date: Thu, 25 Apr 2013 15:17:02 +0000 (-0400) Subject: freedreno: don't patch and re-emit same shader as much X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f706d4d340f0778de23062ef13c54b07bfac7967;p=mesa.git freedreno: don't patch and re-emit same shader as much New textures or vertex buffers don't always require patching and re-emitting the shaders. So do a better job of figuring out when we actually have to patch the shader. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 4ed31593d41..c3a85b5c7ab 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -144,23 +144,24 @@ struct fd_context { /* which state objects need to be re-emit'd: */ enum { - FD_DIRTY_BLEND = (1 << 0), - FD_DIRTY_RASTERIZER = (1 << 1), - FD_DIRTY_ZSA = (1 << 2), - FD_DIRTY_FRAGTEX = (1 << 3), - FD_DIRTY_VERTTEX = (1 << 4), - FD_DIRTY_PROG = (1 << 5), - FD_DIRTY_VTX = (1 << 6), - FD_DIRTY_BLEND_COLOR = (1 << 7), - FD_DIRTY_STENCIL_REF = (1 << 8), - FD_DIRTY_SAMPLE_MASK = (1 << 9), + FD_DIRTY_BLEND = (1 << 0), + FD_DIRTY_RASTERIZER = (1 << 1), + FD_DIRTY_ZSA = (1 << 2), + FD_DIRTY_FRAGTEX = (1 << 3), + FD_DIRTY_VERTTEX = (1 << 4), + FD_DIRTY_TEXSTATE = (1 << 5), + FD_DIRTY_PROG = (1 << 6), + FD_DIRTY_BLEND_COLOR = (1 << 7), + FD_DIRTY_STENCIL_REF = (1 << 8), + FD_DIRTY_SAMPLE_MASK = (1 << 9), FD_DIRTY_FRAMEBUFFER = (1 << 10), - FD_DIRTY_STIPPLE = (1 << 12), + FD_DIRTY_STIPPLE = (1 << 11), FD_DIRTY_VIEWPORT = (1 << 12), FD_DIRTY_CONSTBUF = (1 << 13), - FD_DIRTY_VERTEXBUF = (1 << 14), - FD_DIRTY_INDEXBUF = (1 << 15), - FD_DIRTY_SCISSOR = (1 << 16), + FD_DIRTY_VTXSTATE = (1 << 14), + FD_DIRTY_VTXBUF = (1 << 15), + FD_DIRTY_INDEXBUF = (1 << 16), + FD_DIRTY_SCISSOR = (1 << 17), } dirty; struct fd_blend_stateobj *blend; diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index 3857a1cfe17..7605e82d877 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -275,11 +275,11 @@ fd_program_validate(struct fd_context *ctx) prog->dirty = 0; /* if necessary, fix up vertex fetch instructions: */ - if (ctx->dirty & (FD_DIRTY_VTX | FD_DIRTY_VERTEXBUF | FD_DIRTY_PROG)) + if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG)) patch_vtx_fetches(ctx, prog->vp, ctx->vtx); /* if necessary, fix up texture fetch instructions: */ - if (ctx->dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) { + if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) { patch_tex_fetches(ctx, prog->vp, &ctx->verttex); patch_tex_fetches(ctx, prog->fp, &ctx->fragtex); } @@ -400,9 +400,7 @@ create_blit_vp(void) ir2_reg_create(instr, 1, NULL, 0); ir2_reg_create(instr, 1, NULL, 0); - return assemble(so); - } /* Creates shader: diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 064a2faf787..dcac6244a33 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -172,11 +172,27 @@ fd_set_vertex_buffers(struct pipe_context *pctx, { struct fd_context *ctx = fd_context(pctx); struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf; + int i; + + /* on a2xx, pitch is encoded in the vtx fetch instruction, so + * we need to mark VTXSTATE as dirty as well to trigger patching + * and re-emitting the vtx shader: + */ + for (i = 0; i < count; i++) { + bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer); + bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer; + uint32_t new_stride = vb ? vb[i].stride : 0; + uint32_t old_stride = so->vb[i].stride; + if ((new_enabled != old_enabled) || (new_stride != old_stride)) { + ctx->dirty |= FD_DIRTY_VTXSTATE; + break; + } + } util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count); so->count = util_last_bit(so->enabled_mask); - ctx->dirty |= FD_DIRTY_VERTEXBUF; + ctx->dirty |= FD_DIRTY_VTXBUF; } static void @@ -444,7 +460,7 @@ fd_state_emit(struct pipe_context *pctx, uint32_t dirty) A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); } - if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTX | FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) { + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) { fd_program_validate(ctx); fd_program_emit(ring, &ctx->prog); } diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c index 9584dd3f3ea..3ea51ce1e5e 100644 --- a/src/gallium/drivers/freedreno/freedreno_texture.c +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -185,6 +185,14 @@ fd_fragtex_sampler_states_bind(struct pipe_context *pctx, unsigned nr, void **hwcso) { struct fd_context *ctx = fd_context(pctx); + + /* on a2xx, since there is a flat address space for textures/samplers, + * a change in # of fragment textures/samplers will trigger patching and + * re-emitting the vertex shader: + */ + if (nr != ctx->fragtex.num_samplers) + ctx->dirty |= FD_DIRTY_TEXSTATE; + bind_sampler_states(&ctx->fragtex, nr, hwcso); ctx->dirty |= FD_DIRTY_FRAGTEX; } @@ -195,6 +203,14 @@ fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr, struct pipe_sampler_view **views) { struct fd_context *ctx = fd_context(pctx); + + /* on a2xx, since there is a flat address space for textures/samplers, + * a change in # of fragment textures/samplers will trigger patching and + * re-emitting the vertex shader: + */ + if (nr != ctx->fragtex.num_textures) + ctx->dirty |= FD_DIRTY_TEXSTATE; + set_sampler_views(&ctx->fragtex, nr, views); ctx->dirty |= FD_DIRTY_FRAGTEX; } @@ -218,55 +234,24 @@ fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr, ctx->dirty |= FD_DIRTY_VERTTEX; } -static bool -tex_cmp(struct fd_texture_stateobj *tex1, unsigned samp_id1, - struct fd_texture_stateobj *tex2, unsigned samp_id2) -{ - if ((samp_id1 >= tex1->num_samplers) || - (samp_id2 >= tex2->num_samplers)) - return false; - - if ((tex1 == tex2) && (samp_id1 == samp_id2)) - return true; - - if (tex1->textures[samp_id1]->texture != tex2->textures[samp_id2]->texture) - return false; - - if (memcmp(&tex1->samplers[samp_id1]->base, &tex2->samplers[samp_id2]->base, - sizeof(tex1->samplers[samp_id1]->base))) - return false; - - return true; -} - /* map gallium sampler-id to hw const-idx.. adreno uses a flat address * space of samplers (const-idx), so we need to map the gallium sampler-id * which is per-shader to a global const-idx space. + * + * Fragment shader sampler maps directly to const-idx, and vertex shader + * is offset by the # of fragment shader samplers. If the # of fragment + * shader samplers changes, this shifts the vertex shader indexes. + * + * TODO maybe we can do frag shader 0..N and vert shader N..0 to avoid + * this?? */ unsigned fd_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex, unsigned samp_id) { - unsigned i, const_idx = 0; - - /* TODO maybe worth having some sort of cache, because we need to - * do this loop thru all the samplers both when patching shaders - * and also when emitting sampler state.. - */ - - for (i = 0; i < ctx->verttex.num_samplers; i++) { - if (tex_cmp(&ctx->verttex, i, tex, samp_id)) - return const_idx; - const_idx++; - } - - for (i = 0; i < ctx->fragtex.num_samplers; i++) { - if (tex_cmp(&ctx->fragtex, i, tex, samp_id)) - return const_idx; - const_idx++; - } - - return const_idx; + if (tex == &ctx->fragtex) + return samp_id; + return samp_id + ctx->fragtex.num_samplers; } void diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.c b/src/gallium/drivers/freedreno/freedreno_vbo.c index 763171dc243..d3a6386ee4b 100644 --- a/src/gallium/drivers/freedreno/freedreno_vbo.c +++ b/src/gallium/drivers/freedreno/freedreno_vbo.c @@ -65,7 +65,7 @@ fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso) { struct fd_context *ctx = fd_context(pctx); ctx->vtx = hwcso; - ctx->dirty |= FD_DIRTY_VTX; + ctx->dirty |= FD_DIRTY_VTXSTATE; } static void @@ -193,9 +193,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* and any buffers used, need to be resolved: */ ctx->resolve |= buffers; - fd_state_emit(pctx, ctx->dirty); + if (ctx->dirty & FD_DIRTY_VTXBUF) + emit_vertexbufs(ctx); - emit_vertexbufs(ctx, info->count); + fd_state_emit(pctx, ctx->dirty); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));