r600g: split resource emit path from main register emit path
authorDave Airlie <airlied@redhat.com>
Tue, 7 Jun 2011 03:21:02 +0000 (13:21 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 8 Jun 2011 01:47:00 +0000 (11:47 +1000)
Since resources don't generally vary in size, this splits
the emit path, it also takes into a/c that texture and vertex resources
have different number of relocs, and avoids emitting the extra
reloc for vertex resources.

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600.h
src/gallium/winsys/r600/drm/evergreen_hw_context.c
src/gallium/winsys/r600/drm/r600_hw_context.c
src/gallium/winsys/r600/drm/r600_priv.h

index 0d37376594bb86401457c116207e7ec8357dbdd2..bf7138d9e4e845a700cf80e05875f1df8b20e575 100644 (file)
@@ -162,6 +162,9 @@ struct r600_pipe_resource_state {
 
 #define R600_BLOCK_STATUS_ENABLED      (1 << 0)
 #define R600_BLOCK_STATUS_DIRTY                (1 << 1)
+#define R600_BLOCK_STATUS_RESOURCE_DIRTY       (1 << 2)
+
+#define R600_BLOCK_STATUS_RESOURCE_VERTEX      (1 << 3)
 
 struct r600_block_reloc {
        struct r600_bo          *bo;
@@ -236,6 +239,7 @@ struct r600_context {
        unsigned                nblocks;
        struct r600_block       **blocks;
        struct list_head        dirty;
+       struct list_head        resource_dirty;
        struct list_head        enable_list;
        unsigned                pm4_ndwords;
        unsigned                pm4_cdwords;
index 11514156cd0686d6378c211fdf6f19f852cbdcb0..e9f28bd79a91170bd0b141fda475e2628c928ce5 100644 (file)
@@ -910,6 +910,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 
        /* init dirty list */
        LIST_INITHEAD(&ctx->dirty);
+       LIST_INITHEAD(&ctx->resource_dirty);
        LIST_INITHEAD(&ctx->enable_list);
 
        ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range));
@@ -1188,6 +1189,10 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
                r600_context_block_emit_dirty(ctx, dirty_block);
        }
 
+       LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->resource_dirty,list) {
+               r600_context_block_resource_emit_dirty(ctx, dirty_block);
+       }
+
        /* draw packet */
        pm4 = &ctx->pm4[ctx->pm4_cdwords];
        pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing);
index 7db5a03bce4774ac28f63be2479844b81c3235d6..7386c002659c596e9f10b50e946f2626f03c8db7 100644 (file)
@@ -88,7 +88,13 @@ static void r600_init_block(struct r600_context *ctx,
        int j, n = nreg;
 
        /* initialize block */
-       block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */
+       if (opcode == PKT3_SET_RESOURCE) {
+               block->flags = BLOCK_FLAG_RESOURCE;
+               block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; /* dirty all blocks at start */
+       } else {
+               block->flags = 0;
+               block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */
+       }
        block->start_offset = reg[i].offset;
        block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0);
        block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2;
@@ -96,7 +102,6 @@ static void r600_init_block(struct r600_context *ctx,
        block->pm4_ndwords += n;
        block->nreg = n;
        block->nreg_dirty = n;
-       block->flags = 0;
        LIST_INITHEAD(&block->list);
        LIST_INITHEAD(&block->enable_list);
 
@@ -777,6 +782,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 
        /* init dirty list */
        LIST_INITHEAD(&ctx->dirty);
+       LIST_INITHEAD(&ctx->resource_dirty);
        LIST_INITHEAD(&ctx->enable_list);
 
        ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range));
@@ -1048,6 +1054,23 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
        }
 }
 
+static void r600_context_dirty_resource_block(struct r600_context *ctx,
+                                             struct r600_block *block,
+                                             int dirty, int index)
+{
+       block->nreg_dirty = index + 1;
+
+       if ((dirty != (block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
+               block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
+               ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+               if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
+                       block->status |= R600_BLOCK_STATUS_ENABLED;
+                       LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
+               }
+               LIST_ADDTAIL(&block->list,&ctx->resource_dirty);
+       }
+}
+
 void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block)
 {
        int i;
@@ -1056,38 +1079,37 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
        boolean is_vertex;
 
        if (state == NULL) {
-               block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+               block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY);
                if (block->reloc[1].bo)
                        block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE;
 
                r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
-               r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
+               r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
                LIST_DELINIT(&block->list);
                LIST_DELINIT(&block->enable_list);
                return;
        }
 
        is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000);
-       dirty = block->status & R600_BLOCK_STATUS_DIRTY;
+       dirty = block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY;
 
        if (memcmp(block->reg, state->val, num_regs*4)) {
                memcpy(block->reg, state->val, num_regs * 4);
-               dirty |= R600_BLOCK_STATUS_DIRTY;
+               dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
        }
 
        /* if no BOs on block, force dirty */
        if (!block->reloc[1].bo || !block->reloc[2].bo)
-               dirty |= R600_BLOCK_STATUS_DIRTY;
+               dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 
        if (!dirty) {
                if (is_vertex) {
-                       if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) ||
-                           (block->reloc[2].bo->bo->handle != state->bo[0]->bo->handle))
-                               dirty |= R600_BLOCK_STATUS_DIRTY;
+                       if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle)
+                               dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
                } else {
                        if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) ||
                            (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle))
-                               dirty |= R600_BLOCK_STATUS_DIRTY;
+                               dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
                }
        }
        if (!dirty) {
@@ -1103,7 +1125,7 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
                         * we have single case btw VERTEX & TEXTURE resource
                         */
                        r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
-                       r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[0]);
+                       r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
                        state->bo[0]->fence = ctx->radeon->fence;
                } else {
                        /* TEXTURE RESOURCE */
@@ -1114,8 +1136,14 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
                        state->bo[0]->bo->binding |= BO_BOUND_TEXTURE;
                }
        }
-       if (dirty)
-               r600_context_dirty_block(ctx, block, dirty, num_regs - 1);
+       if (dirty) {
+               if (is_vertex)
+                       block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX;
+               else
+                       block->status &= ~R600_BLOCK_STATUS_RESOURCE_VERTEX;
+       
+               r600_context_dirty_resource_block(ctx, block, dirty, num_regs - 1);
+       }
 }
 
 void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid)
@@ -1305,6 +1333,44 @@ out:
        LIST_DELINIT(&block->list);
 }
 
+void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block)
+{
+       int id;
+       int cp_dwords = block->pm4_ndwords, start_dword = 0;
+       int new_dwords = 0;
+       int nbo = block->nbo;
+
+       ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH;
+
+       if (block->status & R600_BLOCK_STATUS_RESOURCE_VERTEX) {
+               nbo = 1;
+               cp_dwords -= 2; /* don't copy the second NOP */
+       }
+
+       for (int j = 0; j < nbo; j++) {
+               if (block->pm4_bo_index[j]) {
+                       /* find relocation */
+                       id = block->pm4_bo_index[j];
+                       r600_context_bo_reloc(ctx,
+                                             &block->pm4[block->reloc[id].bo_pm4_index],
+                                             block->reloc[id].bo);
+                       r600_context_bo_flush(ctx,
+                                             block->reloc[id].flush_flags,
+                                             block->reloc[id].flush_mask,
+                                             block->reloc[id].bo);
+               }
+       }
+       ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
+
+       memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, cp_dwords * 4);
+       ctx->pm4_cdwords += cp_dwords;
+
+out:
+       block->status ^= R600_BLOCK_STATUS_RESOURCE_DIRTY;
+       block->nreg_dirty = 0;
+       LIST_DELINIT(&block->list);
+}
+
 void r600_context_flush_dest_caches(struct r600_context *ctx)
 {
        struct r600_bo *cb[8];
@@ -1389,6 +1455,10 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
                r600_context_block_emit_dirty(ctx, dirty_block);
        }
 
+       LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->resource_dirty, list) {
+               r600_context_block_resource_emit_dirty(ctx, dirty_block);
+       }
+
        /* draw packet */
        pm4 = &ctx->pm4[ctx->pm4_cdwords];
 
@@ -1500,12 +1570,19 @@ void r600_context_flush(struct r600_context *ctx)
         * next draw command
         */
        LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) {
-               if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) {
-                       LIST_ADDTAIL(&enable_block->list,&ctx->dirty);
+               if (!(enable_block->flags & BLOCK_FLAG_RESOURCE)) {
+                       if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) {
+                               LIST_ADDTAIL(&enable_block->list,&ctx->dirty);
+                               enable_block->status |= R600_BLOCK_STATUS_DIRTY;
+                       }
+               } else {
+                       if(!(enable_block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) {
+                               LIST_ADDTAIL(&enable_block->list,&ctx->resource_dirty);
+                               enable_block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
+                       }
                }
                ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + 
                        enable_block->pm4_flush_ndwords;
-               enable_block->status |= R600_BLOCK_STATUS_DIRTY;
                enable_block->nreg_dirty = enable_block->nreg;
        }
 }
index da31a42b2a058cbb63c97139805c1a03409fcfea..16deab60c96ae672451a708d21da9abaf3c2671c 100644 (file)
@@ -62,11 +62,13 @@ struct radeon {
        pipe_mutex bo_handles_mutex;
 };
 
+/* these flags are used in register flags and added into block flags */
 #define REG_FLAG_NEED_BO 1
 #define REG_FLAG_DIRTY_ALWAYS 2
 #define REG_FLAG_RV6XX_SBU 4
 #define REG_FLAG_NOT_R600 8
 #define REG_FLAG_ENABLE_ALWAYS 16
+#define BLOCK_FLAG_RESOURCE 32
 
 struct r600_reg {
        unsigned                        offset;
@@ -162,6 +164,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
                           unsigned opcode, unsigned offset_base);
 void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block);
 void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block);
+void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block);
 void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block,
                              int dirty, int index);
 int r600_setup_block_table(struct r600_context *ctx);