From: Grazvydas Ignotas Date: Sun, 9 Aug 2015 21:42:35 +0000 (+0300) Subject: r600g: use a bitfield to track dirty atoms X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=50545882113b389decc3f05771764f6c62213af3;p=mesa.git r600g: use a bitfield to track dirty atoms r600 currently has 73 atoms and looping through their dirty flags has become costly because checking each flag requires a pointer dereference before the read. To avoid having to do that add additional bitfield which can be checked really quickly thanks to tzcnt instruction. id field was added to struct r600_atom but that doesn't affect memory usage for both 32 and 64 bit CPUs because it was stuffed into padding. The performance improvement is ~2% for benchmarks that can have FPS in the thousands but is hardly measurable in "real" programs. Signed-off-by: Marek Olšák --- diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index c048a713099..64451516c23 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, unsigned i; /* The number of dwords all the dirty states would take. */ - for (i = 0; i < R600_NUM_ATOMS; i++) { - if (ctx->atoms[i] && ctx->atoms[i]->dirty) { - num_dw += ctx->atoms[i]->num_dw; - if (ctx->screen->b.trace_bo) { - num_dw += R600_TRACE_CS_DWORDS; - } + i = r600_next_dirty_atom(ctx, 0); + while (i < R600_NUM_ATOMS) { + num_dw += ctx->atoms[i]->num_dw; + if (ctx->screen->b.trace_bo) { + num_dw += R600_TRACE_CS_DWORDS; } + i = r600_next_dirty_atom(ctx, i + 1); } /* The upper-bound of how much space a draw command would take. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8b612698b13..5d10bb48157 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -85,6 +85,9 @@ #define R600_BIG_ENDIAN 0 #endif +#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8) +#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS) + struct r600_context; struct r600_bytecode; struct r600_shader_key; @@ -426,6 +429,8 @@ struct r600_context { /* State binding slots are here. */ struct r600_atom *atoms[R600_NUM_ATOMS]; + /* Dirty atom bitmask for fast tests */ + unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN]; /* States for CS initialization. */ struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ /** Compute specific registers initializations. The start_cs_cmd atom @@ -502,7 +507,18 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx, struct r600_atom *atom, bool dirty) { + unsigned long mask; + unsigned int w; + atom->dirty = dirty; + + assert(atom->id != 0); + w = atom->id / R600_DIRTY_ATOM_WORD_BITS; + mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS); + if (dirty) + rctx->dirty_atoms[w] |= mask; + else + rctx->dirty_atoms[w] &= ~mask; } static inline void r600_mark_atom_dirty(struct r600_context *rctx, @@ -511,6 +527,35 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx, r600_set_atom_dirty(rctx, atom, true); } +static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx, + unsigned int id) +{ +#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ) + unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS; + unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS; + unsigned long bits, mask = (1ul << bit) - 1; + + for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) { + bits = rctx->dirty_atoms[w] & ~mask; + if (bits == 0) + continue; + return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits); + } + + return R600_NUM_ATOMS; +#else + for (; id < R600_NUM_ATOMS; id++) { + bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] & + (1ul << (id % R600_DIRTY_ATOM_WORD_BITS))); + assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty)); + if (dirty) + break; + } + + return id; +#endif +} + void r600_trace_emit(struct r600_context *rctx); static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a4238a2fec9..8d0942ff898 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -54,6 +54,7 @@ void r600_add_atom(struct r600_context *rctx, assert(id < R600_NUM_ATOMS); assert(rctx->atoms[id] == NULL); rctx->atoms[id] = atom; + atom->id = id; atom->dirty = false; } @@ -1476,11 +1477,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE); r600_flush_emit(rctx); - for (i = 0; i < R600_NUM_ATOMS; i++) { - if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) { - continue; - } + i = r600_next_dirty_atom(rctx, 0); + while (i < R600_NUM_ATOMS) { r600_emit_atom(rctx, rctx->atoms[i]); + i = r600_next_dirty_atom(rctx, i + 1); } if (rctx->b.chip_class == CAYMAN) { diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 765ee3f70ba..768fe282981 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -315,6 +315,7 @@ struct r600_common_screen { struct r600_atom { void (*emit)(struct r600_common_context *ctx, struct r600_atom *state); unsigned num_dw; + unsigned short id; /* used by r600 only */ bool dirty; };