This gets me from 2200 to 1978 dwords for a gears frame.
This is due to us having some 32-dwords blocks in the SPI, that we only
modify the first dwords off.
v2: fix dirty reg count from Bas Nieuwenhuizen
Signed-off-by: Dave Airlie <airlied@redhat.com>
unsigned pm4_ndwords;
unsigned pm4_flush_ndwords;
unsigned nbo;
- unsigned nreg;
+ u16 nreg;
+ u16 nreg_dirty;
u32 *reg;
u32 pm4[R600_BLOCK_MAX_REG];
unsigned pm4_bo_index[R600_BLOCK_MAX_REG];
r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
}
- r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY);
+ r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY, 7);
}
void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
}
}
- r600_context_dirty_block(ctx, block, dirty);
+ r600_context_dirty_block(ctx, block, dirty, 2);
}
static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
}
}
- r600_context_dirty_block(ctx, block, dirty);
+ r600_context_dirty_block(ctx, block, dirty, 4);
}
void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
block->reg = &block->pm4[block->pm4_ndwords];
block->pm4_ndwords += n;
block->nreg = n;
+ block->nreg_dirty = n;
block->flags = 0;
LIST_INITHEAD(&block->list);
unsigned new_val;
int dirty;
for (int i = 0; i < state->nregs; i++) {
- unsigned id;
+ unsigned id, reloc_id;
range = &ctx->range[CTX_RANGE_ID(ctx, state->regs[i].offset)];
block = range->blocks[CTX_BLOCK_ID(ctx, state->regs[i].offset)];
dirty |= R600_BLOCK_STATUS_DIRTY;
if (block->pm4_bo_index[id]) {
/* find relocation */
- id = block->pm4_bo_index[id];
- r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
+ reloc_id = block->pm4_bo_index[id];
+ r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, state->regs[i].bo);
state->regs[i].bo->fence = ctx->radeon->fence;
/* always force dirty for relocs for now */
dirty |= R600_BLOCK_STATUS_DIRTY;
}
- r600_context_dirty_block(ctx, block, dirty);
+ r600_context_dirty_block(ctx, block, dirty, id);
}
}
state->regs[2].bo->fence = ctx->radeon->fence;
state->regs[3].bo->fence = ctx->radeon->fence;
}
- r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY);
+ r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY, 6);
}
void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
}
}
- r600_context_dirty_block(ctx, block, dirty);
+ r600_context_dirty_block(ctx, block, dirty, 2);
}
static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
}
}
- r600_context_dirty_block(ctx, block, dirty);
+ r600_context_dirty_block(ctx, block, dirty, 3);
}
void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
}
ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords;
ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg;
}
}
}
#include <os/os_thread.h>
#include "r600.h"
+#define PKT_COUNT_C 0xC000FFFF
+#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16)
+
struct r600_bomgr;
struct r600_bo;
}
static inline void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block,
- int dirty)
+ int dirty, int index)
{
+ if (dirty && (index + 1) > block->nreg_dirty)
+ block->nreg_dirty = index + 1;
+
if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
+
block->status |= R600_BLOCK_STATUS_ENABLED;
block->status |= R600_BLOCK_STATUS_DIRTY;
ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
{
int id;
+ if (block->nreg_dirty == 0 && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) {
+ goto out;
+ }
+
for (int j = 0; j < block->nreg; j++) {
if (block->pm4_bo_index[j]) {
/* find relocation */
}
memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
ctx->pm4_cdwords += block->pm4_ndwords;
+
+ if (block->nreg_dirty != block->nreg && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) {
+ int new_dwords = block->nreg_dirty;
+ uint32_t oldword, newword;
+ ctx->pm4_cdwords -= block->pm4_ndwords;
+ newword = oldword = ctx->pm4[ctx->pm4_cdwords];
+ newword &= PKT_COUNT_C;
+ newword |= PKT_COUNT_S(new_dwords);
+ ctx->pm4[ctx->pm4_cdwords] = newword;
+ ctx->pm4_cdwords += new_dwords + 2;
+ }
+out:
block->status ^= R600_BLOCK_STATUS_DIRTY;
+ block->nreg_dirty = 0;
LIST_DELINIT(&block->list);
}