From: Dave Airlie Date: Mon, 18 Apr 2011 03:03:06 +0000 (+1000) Subject: r600g: modify block to only emit the first few dirty registers. (v2) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=162bc40eed9dc54091b6e64bcd7eab06607b500d;p=mesa.git r600g: modify block to only emit the first few dirty registers. (v2) This gets me from 2200 to 1978 dwords for a gears frame. This is due to us having some 32-dwords blocks in the SPI, that we only modify the first dwords off. v2: fix dirty reg count from Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index d6050002033..41666f2bff2 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -184,7 +184,8 @@ struct r600_block { unsigned pm4_ndwords; unsigned pm4_flush_ndwords; unsigned nbo; - unsigned nreg; + u16 nreg; + u16 nreg_dirty; u32 *reg; u32 pm4[R600_BLOCK_MAX_REG]; unsigned pm4_bo_index[R600_BLOCK_MAX_REG]; diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index b287ed54c29..670cbf7804e 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -664,7 +664,7 @@ static inline void evergreen_context_pipe_state_set_resource(struct r600_context r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); } - r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY); + r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY, 7); } void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) @@ -711,7 +711,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context } } - r600_context_dirty_block(ctx, block, dirty); + r600_context_dirty_block(ctx, block, dirty, 2); } static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id) @@ -745,7 +745,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c } } - r600_context_dirty_block(ctx, block, dirty); + r600_context_dirty_block(ctx, block, dirty, 4); } void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 2d9e40303e0..52abf061cce 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -113,6 +113,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, block->reg = &block->pm4[block->pm4_ndwords]; block->pm4_ndwords += n; block->nreg = n; + block->nreg_dirty = n; block->flags = 0; LIST_INITHEAD(&block->list); @@ -844,7 +845,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat unsigned new_val; int dirty; for (int i = 0; i < state->nregs; i++) { - unsigned id; + unsigned id, reloc_id; range = &ctx->range[CTX_RANGE_ID(ctx, state->regs[i].offset)]; block = range->blocks[CTX_BLOCK_ID(ctx, state->regs[i].offset)]; @@ -863,14 +864,14 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat dirty |= R600_BLOCK_STATUS_DIRTY; if (block->pm4_bo_index[id]) { /* find relocation */ - id = block->pm4_bo_index[id]; - r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo); + reloc_id = block->pm4_bo_index[id]; + r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, state->regs[i].bo); state->regs[i].bo->fence = ctx->radeon->fence; /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } - r600_context_dirty_block(ctx, block, dirty); + r600_context_dirty_block(ctx, block, dirty, id); } } @@ -909,7 +910,7 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx state->regs[2].bo->fence = ctx->radeon->fence; state->regs[3].bo->fence = ctx->radeon->fence; } - r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY); + r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY, 6); } void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) @@ -955,7 +956,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, } } - r600_context_dirty_block(ctx, block, dirty); + r600_context_dirty_block(ctx, block, dirty, 2); } static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) @@ -983,7 +984,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex } } - r600_context_dirty_block(ctx, block, dirty); + r600_context_dirty_block(ctx, block, dirty, 3); } void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) @@ -1211,6 +1212,7 @@ void r600_context_flush(struct r600_context *ctx) } ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords; ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY; + ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg; } } } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 534df11f185..faf47e82ab8 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -36,6 +36,9 @@ #include #include "r600.h" +#define PKT_COUNT_C 0xC000FFFF +#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) + struct r600_bomgr; struct r600_bo; @@ -198,9 +201,13 @@ static void inline r600_context_reg(struct r600_context *ctx, } static inline void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, - int dirty) + int dirty, int index) { + if (dirty && (index + 1) > block->nreg_dirty) + block->nreg_dirty = index + 1; + if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; block->status |= R600_BLOCK_STATUS_DIRTY; ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; @@ -212,6 +219,10 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc { int id; + if (block->nreg_dirty == 0 && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) { + goto out; + } + for (int j = 0; j < block->nreg; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ @@ -227,7 +238,20 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc } memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); ctx->pm4_cdwords += block->pm4_ndwords; + + if (block->nreg_dirty != block->nreg && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) { + int new_dwords = block->nreg_dirty; + uint32_t oldword, newword; + ctx->pm4_cdwords -= block->pm4_ndwords; + newword = oldword = ctx->pm4[ctx->pm4_cdwords]; + newword &= PKT_COUNT_C; + newword |= PKT_COUNT_S(new_dwords); + ctx->pm4[ctx->pm4_cdwords] = newword; + ctx->pm4_cdwords += new_dwords + 2; + } +out: block->status ^= R600_BLOCK_STATUS_DIRTY; + block->nreg_dirty = 0; LIST_DELINIT(&block->list); }