r600g: use an enabled list to track enabled blocks.
authorDave Airlie <airlied@redhat.com>
Thu, 2 Jun 2011 23:59:12 +0000 (09:59 +1000)
committerDave Airlie <airlied@redhat.com>
Mon, 6 Jun 2011 23:33:53 +0000 (09:33 +1000)
At the end of flushing we were scanning over 450 blocks
with generally about 50 enabled. This reduces the scanning
to just the list of enabled blocks.

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600.h
src/gallium/winsys/r600/drm/evergreen_hw_context.c
src/gallium/winsys/r600/drm/r600_hw_context.c
src/gallium/winsys/r600/drm/r600_priv.h

index cc70600610ed40ca306fa43bded10a51dc4717d2..b1444bf94f4b56e9094aa246412db2b46a992cdc 100644 (file)
@@ -188,6 +188,7 @@ struct r600_block_reloc {
 
 struct r600_block {
        struct list_head        list;
+       struct list_head        enable_list;
        unsigned                status;
        unsigned                flags;
        unsigned                start_offset;
@@ -251,6 +252,7 @@ struct r600_context {
        unsigned                nblocks;
        struct r600_block       **blocks;
        struct list_head        dirty;
+       struct list_head        enable_list;
        unsigned                pm4_ndwords;
        unsigned                pm4_cdwords;
        unsigned                pm4_dirty_cdwords;
index 7072461dc59722a147908a162e30fd3f73662995..7a1be87f3dc1585370c7d52e211e64ecece63bce 100644 (file)
 static const struct r600_reg evergreen_config_reg_list[] = {
        {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
        {R_008A14_PA_CL_ENHANCE, 0, 0, 0},
-       {R_008C00_SQ_CONFIG, 0, 0, 0},
-       {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
-       {R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0},
-       {R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0},
-       {R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0, 0, 0},
-       {R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, 0, 0, 0},
-       {R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0},
-       {R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0},
-       {R_008C28_SQ_STACK_RESOURCE_MGMT_3, 0, 0, 0},
-       {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
-       {R_009100_SPI_CONFIG_CNTL, 0, 0, 0},
-       {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
+       {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C18_SQ_THREAD_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C20_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
 };
 
 
 static const struct r600_reg cayman_config_reg_list[] = {
        {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
        {R_008A14_PA_CL_ENHANCE, 0, 0, 0},
-       {R_008C00_SQ_CONFIG, 0, 0, 0},
-       {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
-       {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0, 0},
-       {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0, 0},
-       {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
-       {R_009100_SPI_CONFIG_CNTL, 0, 0, 0},
-       {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
+       {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+       {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
 };
 
 static const struct r600_reg evergreen_ctl_const_list[] = {
@@ -904,6 +904,10 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
        ctx->radeon = radeon;
        LIST_INITHEAD(&ctx->query_list);
 
+       /* init dirty list */
+       LIST_INITHEAD(&ctx->dirty);
+       LIST_INITHEAD(&ctx->enable_list);
+
        ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range));
        if (!ctx->range) {
                r = -ENOMEM;
@@ -1007,8 +1011,6 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 
        LIST_INITHEAD(&ctx->fenced_bo);
 
-       /* init dirty list */
-       LIST_INITHEAD(&ctx->dirty);
        return 0;
 out_err:
        r600_context_fini(ctx);
@@ -1048,6 +1050,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context
        if (state == NULL) {
                block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
                LIST_DELINIT(&block->list);
+               LIST_DELINIT(&block->enable_list);
                return;
        }
        dirty = block->status & R600_BLOCK_STATUS_DIRTY;
@@ -1086,6 +1089,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c
        if (state == NULL) {
                block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
                LIST_DELINIT(&block->list);
+               LIST_DELINIT(&block->enable_list);
                return;
        }
        if (state->nregs <= 3) {
index 1fd6d34479836c2edac9caf3e55ca8c5a5da1ec6..c5551b2674c1b3abf14defe90ffee1ed68ed22c2 100644 (file)
@@ -98,11 +98,17 @@ static void r600_init_block(struct r600_context *ctx,
        block->nreg_dirty = n;
        block->flags = 0;
        LIST_INITHEAD(&block->list);
+       LIST_INITHEAD(&block->enable_list);
 
        for (j = 0; j < n; j++) {
                if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) {
                        block->flags |= REG_FLAG_DIRTY_ALWAYS;
                }
+               if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) {
+                       block->status |= R600_BLOCK_STATUS_ENABLED;
+                       LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
+               }
+
                if (reg[i+j].flags & REG_FLAG_NEED_BO) {
                        block->nbo++;
                        assert(block->nbo < R600_BLOCK_MAX_BO);
@@ -184,6 +190,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
                }
 
                r600_init_block(ctx, block, reg, i, n, opcode, offset_base);
+
        }
        return 0;
 }
@@ -768,6 +775,10 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
        ctx->radeon = radeon;
        LIST_INITHEAD(&ctx->query_list);
 
+       /* init dirty list */
+       LIST_INITHEAD(&ctx->dirty);
+       LIST_INITHEAD(&ctx->enable_list);
+
        ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range));
        if (!ctx->range) {
                r = -ENOMEM;
@@ -861,9 +872,6 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 
        LIST_INITHEAD(&ctx->fenced_bo);
 
-       /* init dirty list */
-       LIST_INITHEAD(&ctx->dirty);
-
        ctx->max_db = 4;
 
        return 0;
@@ -985,17 +993,20 @@ void r600_context_reg(struct r600_context *ctx,
                r600_context_dirty_block(ctx, block, dirty, id);
 }
 
-void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block,
+void r600_context_dirty_block(struct r600_context *ctx,
+                             struct r600_block *block,
                              int dirty, int index)
 {
        if ((index + 1) > block->nreg_dirty)
                block->nreg_dirty = index + 1;
 
        if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
-
-               block->status |= R600_BLOCK_STATUS_ENABLED;
                block->status |= R600_BLOCK_STATUS_DIRTY;
                ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+               if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
+                       block->status |= R600_BLOCK_STATUS_ENABLED;
+                       LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
+               }
                LIST_ADDTAIL(&block->list,&ctx->dirty);
        }
 }
@@ -1052,6 +1063,7 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
                r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
                r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
                LIST_DELINIT(&block->list);
+               LIST_DELINIT(&block->enable_list);
                return;
        }
 
@@ -1143,6 +1155,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx,
        if (state == NULL) {
                block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
                LIST_DELINIT(&block->list);
+               LIST_DELINIT(&block->enable_list);
                return;
        }
        dirty = block->status & R600_BLOCK_STATUS_DIRTY;
@@ -1180,6 +1193,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex
        if (state == NULL) {
                block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
                LIST_DELINIT(&block->list);
+               LIST_DELINIT(&block->enable_list);
                return;
        }
        if (state->nregs <= 3) {
@@ -1407,6 +1421,7 @@ void r600_context_flush(struct r600_context *ctx)
        uint64_t chunk_array[2];
        unsigned fence;
        int r;
+       struct r600_block *enable_block = NULL, *next_block;
 
        if (!ctx->pm4_cdwords)
                return;
@@ -1480,15 +1495,14 @@ void r600_context_flush(struct r600_context *ctx)
        /* set all valid group as dirty so they get reemited on
         * next draw command
         */
-       for (int i = 0; i < ctx->nblocks; i++) {
-               if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) {
-                       if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) {
-                               LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty);
-                       }
-                       ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords;
-                       ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY;
-                       ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg;
+       LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) {
+               if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) {
+                       LIST_ADDTAIL(&enable_block->list,&ctx->dirty);
                }
+               ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + 
+                       enable_block->pm4_flush_ndwords;
+               enable_block->status |= R600_BLOCK_STATUS_DIRTY;
+               enable_block->nreg_dirty = enable_block->nreg;
        }
 }
 
index 3e0fd6d7a717b0890e40c8b23d0eb017a326b48e..d9cb52409cdc96a42194b1d36bfd0899c4d201e9 100644 (file)
@@ -66,6 +66,7 @@ struct radeon {
 #define REG_FLAG_DIRTY_ALWAYS 2
 #define REG_FLAG_RV6XX_SBU 4
 #define REG_FLAG_NOT_R600 8
+#define REG_FLAG_ENABLE_ALWAYS 16
 
 struct r600_reg {
        unsigned                        offset;