This mainly removes the cache misses when checking the dirty flags.
Not much else though.
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
boolean count_draw_in)
{
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
- int i;
/* There are two memory usage counters in the winsys for all buffers
* that have been added (cs_add_reloc) and two counters in the pipe
num_dw += cs->cdw;
if (count_draw_in) {
- for (i = 0; i < SI_NUM_ATOMS; i++) {
- if (ctx->atoms.array[i]->dirty) {
- num_dw += ctx->atoms.array[i]->num_dw;
- }
- }
+ unsigned mask = ctx->dirty_atoms;
+
+ while (mask)
+ num_dw += ctx->atoms.array[u_bit_scan(&mask)]->num_dw;
/* The number of dwords all the dirty states would take. */
num_dw += si_pm4_dirty_dw(ctx);
struct si_shader_selector *fixed_func_tcs_shader;
union si_state_atoms atoms;
+ unsigned dirty_atoms; /* mask */
struct si_framebuffer framebuffer;
struct si_vertex_element *vertex_elements;
si_set_atom_dirty(struct si_context *sctx,
struct r600_atom *atom, bool dirty)
{
- atom->dirty = dirty;
+ unsigned bit = 1 << (atom->id - 1);
+
+ if (dirty)
+ sctx->dirty_atoms |= bit;
+ else
+ sctx->dirty_atoms &= ~bit;
}
static inline void
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_index_buffer ib = {};
- unsigned i;
+ unsigned mask;
if (!info->count && !info->indirect &&
(info->indexed || !info->count_from_stream_output))
si_need_cs_space(sctx, 0, TRUE);
/* Emit states. */
- for (i = 0; i < SI_NUM_ATOMS; i++) {
- if (sctx->atoms.array[i]->dirty) {
- sctx->atoms.array[i]->emit(&sctx->b, sctx->atoms.array[i]);
- sctx->atoms.array[i]->dirty = false;
- }
+ mask = sctx->dirty_atoms;
+ while (mask) {
+ struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+
+ atom->emit(&sctx->b, atom);
}
+ sctx->dirty_atoms = 0;
si_pm4_emit_dirty(sctx);
si_emit_scratch_reloc(sctx);