r600g: modify block to only emit the first few dirty registers. (v2)
authorDave Airlie <airlied@redhat.com>
Mon, 18 Apr 2011 03:03:06 +0000 (13:03 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 19 Apr 2011 00:12:19 +0000 (10:12 +1000)
This gets me from 2200 to 1978 dwords for a gears frame.

This is due to us having some 32-dwords blocks in the SPI, that we only
modify the first dwords off.

v2: fix dirty reg count from Bas Nieuwenhuizen

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600.h
src/gallium/winsys/r600/drm/evergreen_hw_context.c
src/gallium/winsys/r600/drm/r600_hw_context.c
src/gallium/winsys/r600/drm/r600_priv.h

index d605000203330636dc60b5c78c700a44fbf3756b..41666f2bff2fe29434fdbfd23929c1b02fd2d57e 100644 (file)
@@ -184,7 +184,8 @@ struct r600_block {
        unsigned                pm4_ndwords;
        unsigned                pm4_flush_ndwords;
        unsigned                nbo;
-       unsigned                nreg;
+       u16                     nreg;
+       u16                     nreg_dirty;
        u32                     *reg;
        u32                     pm4[R600_BLOCK_MAX_REG];
        unsigned                pm4_bo_index[R600_BLOCK_MAX_REG];
index b287ed54c29289f5936e80911ce6051d56fdffcb..670cbf7804ee97c0cc1f5e75598602d7ff77660b 100644 (file)
@@ -664,7 +664,7 @@ static inline void evergreen_context_pipe_state_set_resource(struct r600_context
                r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
                r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
        }
-       r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY);
+       r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY, 7);
 }
 
 void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
@@ -711,7 +711,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context
                }
        }
 
-       r600_context_dirty_block(ctx, block, dirty);
+       r600_context_dirty_block(ctx, block, dirty, 2);
 }
 
 static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
@@ -745,7 +745,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c
                }
        }
 
-       r600_context_dirty_block(ctx, block, dirty);
+       r600_context_dirty_block(ctx, block, dirty, 4);
 }
 
 void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
index 2d9e40303e08cd065e614187ef3d759c115a14db..52abf061ccee35f505d8733eaec9665036bbe838 100644 (file)
@@ -113,6 +113,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
                block->reg = &block->pm4[block->pm4_ndwords];
                block->pm4_ndwords += n;
                block->nreg = n;
+               block->nreg_dirty = n;
                block->flags = 0;
                LIST_INITHEAD(&block->list);
 
@@ -844,7 +845,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
        unsigned new_val;
        int dirty;
        for (int i = 0; i < state->nregs; i++) {
-               unsigned id;
+               unsigned id, reloc_id;
 
                range = &ctx->range[CTX_RANGE_ID(ctx, state->regs[i].offset)];
                block = range->blocks[CTX_BLOCK_ID(ctx, state->regs[i].offset)];
@@ -863,14 +864,14 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
                        dirty |= R600_BLOCK_STATUS_DIRTY;
                if (block->pm4_bo_index[id]) {
                        /* find relocation */
-                       id = block->pm4_bo_index[id];
-                       r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
+                       reloc_id = block->pm4_bo_index[id];
+                       r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, state->regs[i].bo);
                        state->regs[i].bo->fence = ctx->radeon->fence;
                        /* always force dirty for relocs for now */
                        dirty |= R600_BLOCK_STATUS_DIRTY;
                }
 
-               r600_context_dirty_block(ctx, block, dirty);
+               r600_context_dirty_block(ctx, block, dirty, id);
        }
 }
 
@@ -909,7 +910,7 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx
                state->regs[2].bo->fence = ctx->radeon->fence;
                state->regs[3].bo->fence = ctx->radeon->fence;
        }
-       r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY);
+       r600_context_dirty_block(ctx, block, R600_BLOCK_STATUS_DIRTY, 6);
 }
 
 void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
@@ -955,7 +956,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx,
                }
        }
 
-       r600_context_dirty_block(ctx, block, dirty);
+       r600_context_dirty_block(ctx, block, dirty, 2);
 }
 
 static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
@@ -983,7 +984,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex
                }
        }
 
-       r600_context_dirty_block(ctx, block, dirty);
+       r600_context_dirty_block(ctx, block, dirty, 3);
 }
 
 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
@@ -1211,6 +1212,7 @@ void r600_context_flush(struct r600_context *ctx)
                        }
                        ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords;
                        ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY;
+                       ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg;
                }
        }
 }
index 534df11f1859dd9f9f493e5a7d0c6ca7d2cc7cbb..faf47e82ab8911bb91c48a936650d2576788c256 100644 (file)
@@ -36,6 +36,9 @@
 #include <os/os_thread.h>
 #include "r600.h"
 
+#define PKT_COUNT_C                     0xC000FFFF
+#define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
+
 struct r600_bomgr;
 struct r600_bo;
 
@@ -198,9 +201,13 @@ static void inline r600_context_reg(struct r600_context *ctx,
 }
 
 static inline void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block,
-                                           int dirty)
+                                           int dirty, int index)
 {
+       if (dirty && (index + 1) > block->nreg_dirty)
+               block->nreg_dirty = index + 1;
+
        if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
+
                block->status |= R600_BLOCK_STATUS_ENABLED;
                block->status |= R600_BLOCK_STATUS_DIRTY;
                ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -212,6 +219,10 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc
 {
        int id;
 
+       if (block->nreg_dirty == 0 && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) {
+               goto out;
+       }
+
        for (int j = 0; j < block->nreg; j++) {
                if (block->pm4_bo_index[j]) {
                        /* find relocation */
@@ -227,7 +238,20 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc
        }
        memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
        ctx->pm4_cdwords += block->pm4_ndwords;
+
+       if (block->nreg_dirty != block->nreg && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) {
+               int new_dwords = block->nreg_dirty;
+               uint32_t oldword, newword;
+               ctx->pm4_cdwords -= block->pm4_ndwords;
+               newword = oldword = ctx->pm4[ctx->pm4_cdwords];
+               newword &= PKT_COUNT_C;
+               newword |= PKT_COUNT_S(new_dwords);
+               ctx->pm4[ctx->pm4_cdwords] = newword;
+               ctx->pm4_cdwords += new_dwords + 2;
+       }
+out:
        block->status ^= R600_BLOCK_STATUS_DIRTY;
+       block->nreg_dirty = 0;
        LIST_DELINIT(&block->list);
 }