X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fwinsys%2Fr600%2Fdrm%2Fr600_hw_context.c;h=5d415ae6348ad72959a80b59d21f99aa1e709420;hb=4682e706012fe26627a2f827db01b5068cc62814;hp=e60bf75c5b1675130278435bdc569c0816f2a986;hpb=d79a4a612bae66581caf21a74bd745ec51a18e80;p=mesa.git diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index e60bf75c5b1..5d415ae6348 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -23,47 +23,192 @@ * Authors: * Jerome Glisse */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "xf86drm.h" -#include "radeon_drm.h" #include "r600_priv.h" -#include "bof.h" #include "r600d.h" +#include "util/u_memory.h" +#include #define GROUP_FORCE_NEW_BLOCK 0 -static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) +/* Get backends mask */ +void r600_get_backend_mask(struct r600_context *ctx) { - for (int i = 0; i < ctx->creloc; i++) { - if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) - LIST_DELINIT(&ctx->bo[i]->fencedlist); - LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); - ctx->bo[i]->fence = ctx->radeon->fence; - ctx->bo[i]->ctx = ctx; + struct r600_bo * buffer; + u32 * results; + unsigned num_backends = r600_get_num_backends(ctx->radeon); + unsigned i, mask = 0; + + /* if backend_map query is supported by the kernel */ + if (ctx->radeon->info.r600_backend_map_valid) { + unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon); + unsigned backend_map = r600_get_backend_map(ctx->radeon); + unsigned item_width, item_mask; + + if (ctx->radeon->chip_class >= EVERGREEN) { + item_width = 4; + item_mask = 0x7; + } else { + item_width = 2; + item_mask = 0x3; + } + + while(num_tile_pipes--) { + i = backend_map & item_mask; + mask |= (1<>= item_width; + } + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + } + + /* otherwise backup path for older kernels */ + + /* create buffer for event data */ + buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0, + PIPE_USAGE_STAGING); + if (!buffer) + goto err; + + /* initialize buffer with zeroes */ + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_WRITE); + if (results) { + memset(results, 0, ctx->max_db * 4 * 4); + r600_bo_unmap(ctx->radeon, buffer); + + /* emit EVENT_WRITE for ZPASS_DONE */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); + + /* execute */ + r600_context_flush(ctx, 0); + + /* analyze results */ + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_READ); + if (results) { + for(i = 0; i < ctx->max_db; i++) { + /* at least highest bit will be set if backend is used */ + if (results[i*4 + 1]) + mask |= (1<radeon, buffer); + } } + + r600_bo_reference(&buffer, NULL); + + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + +err: + /* fallback to old method - set num_backends lower bits to 1 */ + ctx->backend_mask = (~((u32)0))>>(32-num_backends); + return; } -static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) +static inline void r600_context_ps_partial_flush(struct r600_context *ctx) { - struct radeon_bo *bo = NULL; - struct radeon_bo *tmp; + if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) + return; - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - if (bo->fence <= *ctx->radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - } else { - bo->fence = fence; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); + + ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; +} + +void r600_init_cs(struct r600_context *ctx) +{ + /* R6xx requires this packet at the start of each command buffer */ + if (ctx->radeon->family < CHIP_RV770) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_START_3D_CMDBUF, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; + } + /* All asics require this one */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_CONTEXT_CONTROL, 1, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0x80000000; + ctx->pm4[ctx->pm4_cdwords++] = 0x80000000; + + ctx->init_dwords = ctx->pm4_cdwords; +} + +static void r600_init_block(struct r600_context *ctx, + struct r600_block *block, + const struct r600_reg *reg, int index, int nreg, + unsigned opcode, unsigned offset_base) +{ + int i = index; + int j, n = nreg; + + /* initialize block */ + if (opcode == PKT3_SET_RESOURCE) { + block->flags = BLOCK_FLAG_RESOURCE; + block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; /* dirty all blocks at start */ + } else { + block->flags = 0; + block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ + } + block->start_offset = reg[i].offset; + block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); + block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; + block->reg = &block->pm4[block->pm4_ndwords]; + block->pm4_ndwords += n; + block->nreg = n; + block->nreg_dirty = n; + LIST_INITHEAD(&block->list); + LIST_INITHEAD(&block->enable_list); + + for (j = 0; j < n; j++) { + if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { + block->flags |= REG_FLAG_DIRTY_ALWAYS; + } + if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) { + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + LIST_ADDTAIL(&block->list,&ctx->dirty); + } + } + if (reg[i+j].flags & REG_FLAG_FLUSH_CHANGE) { + block->flags |= REG_FLAG_FLUSH_CHANGE; + } + + if (reg[i+j].flags & REG_FLAG_NEED_BO) { + block->nbo++; + assert(block->nbo < R600_BLOCK_MAX_BO); + block->pm4_bo_index[j] = block->nbo; + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); + block->pm4[block->pm4_ndwords++] = 0x00000000; + if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { + block->reloc[block->nbo].flush_flags = 0; + block->reloc[block->nbo].flush_mask = 0; + } else { + block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; + block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; + } + block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; + } + if ((ctx->radeon->family > CHIP_R600) && + (ctx->radeon->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); + block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; } } + for (j = 0; j < n; j++) { + if (reg[i+j].flush_flags) { + block->pm4_flush_ndwords += 7; + } + } + /* check that we stay in limit */ + assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); } int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, @@ -74,14 +219,18 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, int offset; for (unsigned i = 0, n = 0; i < nreg; i += n) { - u32 j; - /* ignore new block balise */ if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { n = 1; continue; } + /* ignore regs not on R600 on R600 */ + if ((reg[i].flags & REG_FLAG_NOT_R600) && ctx->radeon->family == CHIP_R600) { + n = 1; + continue; + } + /* register that need relocation are in their own group */ /* find number of consecutive registers */ n = 0; @@ -102,54 +251,18 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, } ctx->nblocks++; for (int j = 0; j < n; j++) { - range = &ctx->range[CTX_RANGE_ID(ctx, reg[i + j].offset)]; - range->blocks[CTX_BLOCK_ID(ctx, reg[i + j].offset)] = block; + range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)]; + /* create block table if it doesn't exist */ + if (!range->blocks) + range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *)); + if (!range->blocks) + return -1; + + range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; } - /* initialize block */ - block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ - block->start_offset = reg[i].offset; - block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); - block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; - block->reg = &block->pm4[block->pm4_ndwords]; - block->pm4_ndwords += n; - block->nreg = n; - block->nreg_dirty = n; - block->flags = 0; - LIST_INITHEAD(&block->list); + r600_init_block(ctx, block, reg, i, n, opcode, offset_base); - for (j = 0; j < n; j++) { - if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { - block->flags |= REG_FLAG_DIRTY_ALWAYS; - } - if (reg[i+j].flags & REG_FLAG_NEED_BO) { - block->nbo++; - assert(block->nbo < R600_BLOCK_MAX_BO); - block->pm4_bo_index[j] = block->nbo; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); - block->pm4[block->pm4_ndwords++] = 0x00000000; - if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { - block->reloc[block->nbo].flush_flags = 0; - block->reloc[block->nbo].flush_mask = 0; - } else { - block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; - block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; - } - block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; - } - if ((ctx->radeon->family > CHIP_R600) && - (ctx->radeon->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); - block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; - } - } - for (j = 0; j < n; j++) { - if (reg[i+j].flush_flags) { - block->pm4_flush_ndwords += 7; - } - } - /* check that we stay in limit */ - assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); } return 0; } @@ -157,17 +270,17 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, /* R600/R700 configuration */ static const struct r600_reg r600_config_reg_list[] = { {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, - {R_008C00_SQ_CONFIG, 0, 0, 0}, - {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0}, - {R_008C10_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C14_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, - {R_009508_TA_CNTL_AUX, 0, 0, 0}, - {R_009714_VC_ENHANCE, 0, 0, 0}, - {R_009830_DB_DEBUG, 0, 0, 0}, - {R_009838_DB_WATERMARKS, 0, 0, 0}, + {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008C10_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008C14_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_009508_TA_CNTL_AUX, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_009714_VC_ENHANCE, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_009830_DB_DEBUG, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_009838_DB_WATERMARKS, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, }; static const struct r600_reg r600_ctl_const_list[] = { @@ -315,14 +428,14 @@ static const struct r600_reg r600_context_reg_list[] = { {R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, {R_0286E0_SPI_FOG_FUNC_SCALE, 0, 0, 0}, {R_0286E4_SPI_FOG_FUNC_BIAS, 0, 0, 0}, - {R_028780_CB_BLEND0_CONTROL, 0, 0, 0}, - {R_028784_CB_BLEND1_CONTROL, 0, 0, 0}, - {R_028788_CB_BLEND2_CONTROL, 0, 0, 0}, - {R_02878C_CB_BLEND3_CONTROL, 0, 0, 0}, - {R_028790_CB_BLEND4_CONTROL, 0, 0, 0}, - {R_028794_CB_BLEND5_CONTROL, 0, 0, 0}, - {R_028798_CB_BLEND6_CONTROL, 0, 0, 0}, - {R_02879C_CB_BLEND7_CONTROL, 0, 0, 0}, + {R_028780_CB_BLEND0_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028784_CB_BLEND1_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028788_CB_BLEND2_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_02878C_CB_BLEND3_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028790_CB_BLEND4_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028794_CB_BLEND5_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028798_CB_BLEND6_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_02879C_CB_BLEND7_CONTROL, REG_FLAG_NOT_R600, 0, 0}, {R_0287A0_CB_SHADER_CONTROL, 0, 0, 0}, {R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, {R_028804_CB_BLEND_CONTROL, 0, 0, 0}, @@ -527,23 +640,44 @@ static const struct r600_reg r600_context_reg_list[] = { }; /* SHADER RESOURCE R600/R700 */ -static int r600_state_resource_init(struct r600_context *ctx, u32 offset) +int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base) +{ + int i; + struct r600_block *block; + range->blocks = calloc(nblocks, sizeof(struct r600_block *)); + if (range->blocks == NULL) + return -ENOMEM; + + reg[0].offset += offset; + for (i = 0; i < nblocks; i++) { + block = calloc(1, sizeof(struct r600_block)); + if (block == NULL) { + return -ENOMEM; + } + ctx->nblocks++; + range->blocks[i] = block; + r600_init_block(ctx, block, reg, 0, nreg, PKT3_SET_RESOURCE, offset_base); + + reg[0].offset += stride; + } + return 0; +} + + +static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { - {R_038000_RESOURCE0_WORD0, 0, 0, 0}, - {R_038004_RESOURCE0_WORD1, 0, 0, 0}, - {R_038008_RESOURCE0_WORD2, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, - {R_03800C_RESOURCE0_WORD3, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038008_RESOURCE0_WORD2, 0, 0, 0}, + {R_03800C_RESOURCE0_WORD3, 0, 0, 0}, {R_038010_RESOURCE0_WORD4, 0, 0, 0}, {R_038014_RESOURCE0_WORD5, 0, 0, 0}, {R_038018_RESOURCE0_WORD6, 0, 0, 0}, }; unsigned nreg = Elements(r600_shader_resource); - for (int i = 0; i < nreg; i++) { - r600_shader_resource[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET); + return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, R600_RESOURCE_OFFSET); } /* SHADER SAMPLER R600/R700 */ @@ -594,15 +728,20 @@ static int r600_loop_const_init(struct r600_context *ctx, u32 offset) return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); } -static void r600_context_clear_fenced_bo(struct r600_context *ctx) +static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) { - struct radeon_bo *bo, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - bo->ctx = NULL; + struct r600_block *block; + int i; + for (i = 0; i < nblocks; i++) { + block = range->blocks[i]; + if (block) { + for (int k = 1; k <= block->nbo; k++) + r600_bo_reference(&block->reloc[k].bo, NULL); + free(block); + } } + free(range->blocks); + } /* initialize */ @@ -612,54 +751,101 @@ void r600_context_fini(struct r600_context *ctx) struct r600_range *range; for (int i = 0; i < NUM_RANGES; i++) { + if (!ctx->range[i].blocks) + continue; for (int j = 0; j < (1 << HASH_SHIFT); j++) { block = ctx->range[i].blocks[j]; if (block) { for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) { - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - range->blocks[CTX_BLOCK_ID(ctx, offset)] = NULL; + range = &ctx->range[CTX_RANGE_ID(offset)]; + range->blocks[CTX_BLOCK_ID(offset)] = NULL; } for (int k = 1; k <= block->nbo; k++) { - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); } free(block); } } free(ctx->range[i].blocks); } + r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); + r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); + r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->range); free(ctx->blocks); - free(ctx->reloc); free(ctx->bo); - free(ctx->pm4); + ctx->radeon->ws->cs_destroy(ctx->cs); - r600_context_clear_fenced_bo(ctx); memset(ctx, 0, sizeof(struct r600_context)); } +static void r600_add_resource_block(struct r600_context *ctx, struct r600_range *range, int num_blocks, int *index) +{ + int c = *index; + for (int j = 0; j < num_blocks; j++) { + if (!range->blocks[j]) + continue; + + ctx->blocks[c++] = range->blocks[j]; + } + *index = c; +} + +int r600_setup_block_table(struct r600_context *ctx) +{ + /* setup block table */ + int c = 0; + ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); + if (!ctx->blocks) + return -ENOMEM; + for (int i = 0; i < NUM_RANGES; i++) { + if (!ctx->range[i].blocks) + continue; + for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { + if (!ctx->range[i].blocks[j]) + continue; + + add = 1; + for (int k = 0; k < c; k++) { + if (ctx->blocks[k] == ctx->range[i].blocks[j]) { + add = 0; + break; + } + } + if (add) { + assert(c < ctx->nblocks); + ctx->blocks[c++] = ctx->range[i].blocks[j]; + j += (ctx->range[i].blocks[j]->nreg) - 1; + } + } + } + + r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); + r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); + r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); + return 0; +} + int r600_context_init(struct r600_context *ctx, struct radeon *radeon) { int r; memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + LIST_INITHEAD(&ctx->resource_dirty); + LIST_INITHEAD(&ctx->enable_list); + ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); if (!ctx->range) { r = -ENOMEM; goto out_err; } - /* initialize hash */ - for (int i = 0; i < NUM_RANGES; i++) { - ctx->range[i].blocks = calloc(1 << HASH_SHIFT, sizeof(void*)); - if (ctx->range[i].blocks == NULL) { - r = -ENOMEM; - goto out_err; - } - } - /* add blocks */ r = r600_context_add_block(ctx, r600_config_reg_list, Elements(r600_config_reg_list), PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); @@ -699,79 +885,46 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; } - /* PS RESOURCE */ - for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* VS RESOURCE */ - for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* FS RESOURCE */ - for (int j = 0, offset = 0x2300; j < 16; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } + + ctx->num_ps_resources = 160; + ctx->num_vs_resources = 160; + ctx->num_fs_resources = 16; + r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); + if (r) + goto out_err; /* PS loop const */ r600_loop_const_init(ctx, 0); /* VS loop const */ r600_loop_const_init(ctx, 32); - /* setup block table */ - ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); - for (int i = 0, c = 0; i < NUM_RANGES; i++) { - for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { - if (ctx->range[i].blocks[j]) { - add = 1; - for (int k = 0; k < c; k++) { - if (ctx->blocks[k] == ctx->range[i].blocks[j]) { - add = 0; - break; - } - } - if (add) { - assert(c < ctx->nblocks); - ctx->blocks[c++] = ctx->range[i].blocks[j]; - j += (ctx->range[i].blocks[j]->nreg) - 1; - } - } - } - } + r = r600_setup_block_table(ctx); + if (r) + goto out_err; + + ctx->cs = radeon->ws->cs_create(radeon->ws); /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; + ctx->pm4 = ctx->cs->buf; + + r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - - LIST_INITHEAD(&ctx->fenced_bo); - - /* init dirty list */ - LIST_INITHEAD(&ctx->dirty); - ctx->max_db = 4; + r600_get_backend_mask(ctx); return 0; out_err: r600_context_fini(ctx); @@ -785,7 +938,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); @@ -796,11 +949,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) } void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, - unsigned flush_mask, struct r600_bo *rbo) + unsigned flush_mask, struct r600_bo *bo) { - struct radeon_bo *bo; - - bo = rbo->bo; /* if bo has already been flushed */ if (!(~bo->last_flush & flush_flags)) { bo->last_flush &= flush_mask; @@ -812,14 +962,17 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, G_0085F0_DB_ACTION_ENA(flush_flags))) { if (ctx->flags & R600_CONTEXT_CHECK_EVENT_FLUSH) { /* the rv670 seems to fail fbo-generatemipmap unless we flush the CB1 dest base ena */ - if ((ctx->radeon->family == CHIP_RV670) || - (ctx->radeon->family == CHIP_RS780) || - (ctx->radeon->family == CHIP_RS880)) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = S_0085F0_CB1_DEST_BASE_ENA(1); /* CP_COHER_CNTL */ - ctx->pm4[ctx->pm4_cdwords++] = 0xffffffff; /* CP_COHER_SIZE */ - ctx->pm4[ctx->pm4_cdwords++] = 0; /* CP_COHER_BASE */ - ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; /* POLL_INTERVAL */ + if ((bo->binding & BO_BOUND_TEXTURE) && + (flush_flags & S_0085F0_CB_ACTION_ENA(1))) { + if ((ctx->radeon->family == CHIP_RV670) || + (ctx->radeon->family == CHIP_RS780) || + (ctx->radeon->family == CHIP_RS880)) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); + ctx->pm4[ctx->pm4_cdwords++] = S_0085F0_CB1_DEST_BASE_ENA(1); /* CP_COHER_CNTL */ + ctx->pm4[ctx->pm4_cdwords++] = 0xffffffff; /* CP_COHER_SIZE */ + ctx->pm4[ctx->pm4_cdwords++] = 0; /* CP_COHER_BASE */ + ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; /* POLL_INTERVAL */ + } } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing); @@ -829,38 +982,15 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; - ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; + ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); } bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } -void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) -{ - struct radeon_bo *bo; - - bo = rbo->bo; - assert(bo != NULL); - if (bo->reloc) { - *pm4 = bo->reloc_id; - return; - } - bo->reloc = &ctx->reloc[ctx->creloc]; - bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; - ctx->reloc[ctx->creloc].handle = bo->handle; - ctx->reloc[ctx->creloc].read_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].flags = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); - rbo->fence = ctx->radeon->fence; - ctx->creloc++; - /* set PKT3 to point to proper reloc */ - *pm4 = bo->reloc_id; -} - void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask) @@ -871,8 +1001,8 @@ void r600_context_reg(struct r600_context *ctx, unsigned new_val; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; id = (offset - block->start_offset) >> 2; dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -884,42 +1014,49 @@ void r600_context_reg(struct r600_context *ctx, dirty |= R600_BLOCK_STATUS_DIRTY; block->reg[id] = new_val; } - r600_context_dirty_block(ctx, block, dirty, id); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, id); } -void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, +void r600_context_dirty_block(struct r600_context *ctx, + struct r600_block *block, int dirty, int index) { - if (dirty && (index + 1) > block->nreg_dirty) + if ((index + 1) > block->nreg_dirty) block->nreg_dirty = index + 1; if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { - - block->status |= R600_BLOCK_STATUS_ENABLED; block->status |= R600_BLOCK_STATUS_DIRTY; ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + } LIST_ADDTAIL(&block->list,&ctx->dirty); + + if (block->flags & REG_FLAG_FLUSH_CHANGE) { + r600_context_ps_partial_flush(ctx); + } } } void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) { - struct r600_range *range; struct r600_block *block; unsigned new_val; int dirty; for (int i = 0; i < state->nregs; i++) { unsigned id, reloc_id; + struct r600_pipe_reg *reg = &state->regs[i]; - range = &ctx->range[CTX_RANGE_ID(ctx, state->regs[i].offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, state->regs[i].offset)]; - id = (state->regs[i].offset - block->start_offset) >> 2; + block = reg->block; + id = reg->id; dirty = block->status & R600_BLOCK_STATUS_DIRTY; new_val = block->reg[id]; - new_val &= ~state->regs[i].mask; - new_val |= state->regs[i].value; + new_val &= ~reg->mask; + new_val |= reg->value; if (new_val != block->reg[id]) { block->reg[id] = new_val; dirty |= R600_BLOCK_STATUS_DIRTY; @@ -929,105 +1066,120 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat if (block->pm4_bo_index[id]) { /* find relocation */ reloc_id = block->pm4_bo_index[id]; - r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, state->regs[i].bo); - state->regs[i].bo->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo); + block->reloc[reloc_id].bo_usage = reg->bo_usage; /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } - r600_context_dirty_block(ctx, block, dirty, id); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, id); } } -void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +static void r600_context_dirty_resource_block(struct r600_context *ctx, + struct r600_block *block, + int dirty, int index) +{ + block->nreg_dirty = index + 1; + + if ((dirty != (block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + } + LIST_ADDTAIL(&block->list,&ctx->resource_dirty); + } +} + +void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block) { - struct r600_range *range; - struct r600_block *block; - int i; int dirty; int num_regs = ctx->radeon->chip_class >= EVERGREEN ? 8 : 7; + boolean is_vertex; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; if (state == NULL) { - block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); + if (block->reloc[1].bo) + block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE; + + r600_bo_reference(&block->reloc[1].bo, NULL); + r600_bo_reference(&block->reloc[2].bo, NULL); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } - dirty = block->status & R600_BLOCK_STATUS_DIRTY; + is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000); + dirty = block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY; - for (i = 0; i < num_regs; i++) { - if (block->reg[i] != state->regs[i].value) { - dirty |= R600_BLOCK_STATUS_DIRTY; - block->reg[i] = state->regs[i].value; - } + if (memcmp(block->reg, state->val, num_regs*4)) { + memcpy(block->reg, state->val, num_regs * 4); + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } /* if no BOs on block, force dirty */ if (!block->reloc[1].bo || !block->reloc[2].bo) - dirty |= R600_BLOCK_STATUS_DIRTY; + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; if (!dirty) { - if (state->regs[0].bo) { - if ((block->reloc[1].bo->bo->handle != state->regs[0].bo->bo->handle) || - (block->reloc[2].bo->bo->handle != state->regs[0].bo->bo->handle)) - dirty |= R600_BLOCK_STATUS_DIRTY; + if (is_vertex) { + if (block->reloc[1].bo->buf != state->bo[0]->buf) + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } else { - if ((block->reloc[1].bo->bo->handle != state->regs[2].bo->bo->handle) || - (block->reloc[2].bo->bo->handle != state->regs[3].bo->bo->handle)) - dirty |= R600_BLOCK_STATUS_DIRTY; + if ((block->reloc[1].bo->buf != state->bo[0]->buf) || + (block->reloc[2].bo->buf != state->bo[1]->buf)) + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } - if (!dirty) { - if (state->regs[0].bo) - state->regs[0].bo->fence = ctx->radeon->fence; - else { - state->regs[2].bo->fence = ctx->radeon->fence; - state->regs[3].bo->fence = ctx->radeon->fence; - } - } else { - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); - if (state->regs[0].bo) { + + if (dirty) { + if (is_vertex) { /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); - state->regs[0].bo->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; + r600_bo_reference(&block->reloc[2].bo, NULL); } else { /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); - state->regs[2].bo->fence = ctx->radeon->fence; - state->regs[3].bo->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; + r600_bo_reference(&block->reloc[2].bo, state->bo[1]); + block->reloc[2].bo_usage = state->bo_usage[1]; + state->bo[0]->binding |= BO_BOUND_TEXTURE; } + + if (is_vertex) + block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; + else + block->status &= ~R600_BLOCK_STATUS_RESOURCE_VERTEX; + + r600_context_dirty_resource_block(ctx, block, dirty, num_regs - 1); } - r600_context_dirty_block(ctx, block, dirty, num_regs - 1); } -void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid; + struct r600_block *block = ctx->ps_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid; + struct r600_block *block = ctx->vs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x2300 + 0x1C * rid; + struct r600_block *block = ctx->fs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) @@ -1037,11 +1189,12 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, int i; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -1052,19 +1205,10 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, } } - r600_context_dirty_block(ctx, block, dirty, 2); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 2); } -static inline void r600_context_ps_partial_flush(struct r600_context *ctx) -{ - if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) - return; - - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - - ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; -} static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) { @@ -1073,11 +1217,12 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex int i; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } if (state->nregs <= 3) { @@ -1096,8 +1241,8 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex * will end up using the new border color. */ if (dirty & R600_BLOCK_STATUS_DIRTY) r600_context_ps_partial_flush(ctx); - - r600_context_dirty_block(ctx, block, dirty, 3); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 3); } void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) @@ -1126,8 +1271,8 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) struct r600_block *block; unsigned id; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; offset -= block->start_offset; id = block->pm4_bo_index[offset >> 2]; if (block->reloc[id].bo) { @@ -1138,39 +1283,52 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; + int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); + int cp_dwords = block->pm4_ndwords, start_dword = 0; + int new_dwords = 0; + int nbo = block->nbo; - if (block->nreg_dirty == 0 && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) { + if (block->nreg_dirty == 0 && optional) { goto out; } - ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; - for (int j = 0; j < block->nreg; j++) { - if (block->pm4_bo_index[j]) { - /* find relocation */ - id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); - r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + if (nbo) { + ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; + + for (int j = 0; j < block->nreg; j++) { + if (block->pm4_bo_index[j]) { + /* find relocation */ + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); + r600_context_bo_flush(ctx, + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); + nbo--; + if (nbo == 0) + break; + } } + ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; } - ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; - memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); - ctx->pm4_cdwords += block->pm4_ndwords; - - if (block->nreg_dirty != block->nreg && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) { - int new_dwords = block->nreg_dirty; - uint32_t oldword, newword; - ctx->pm4_cdwords -= block->pm4_ndwords; - newword = oldword = ctx->pm4[ctx->pm4_cdwords]; + + optional &= (block->nreg_dirty != block->nreg); + if (optional) { + new_dwords = block->nreg_dirty; + start_dword = ctx->pm4_cdwords; + cp_dwords = new_dwords + 2; + } + memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, cp_dwords * 4); + ctx->pm4_cdwords += cp_dwords; + + if (optional) { + uint32_t newword; + + newword = ctx->pm4[start_dword]; newword &= PKT_COUNT_C; newword |= PKT_COUNT_S(new_dwords); - ctx->pm4[ctx->pm4_cdwords] = newword; - ctx->pm4_cdwords += new_dwords + 2; + ctx->pm4[start_dword] = newword; } out: block->status ^= R600_BLOCK_STATUS_DIRTY; @@ -1178,6 +1336,40 @@ out: LIST_DELINIT(&block->list); } +void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) +{ + int cp_dwords = block->pm4_ndwords; + int nbo = block->nbo; + + ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; + + if (block->status & R600_BLOCK_STATUS_RESOURCE_VERTEX) { + nbo = 1; + cp_dwords -= 2; /* don't copy the second NOP */ + } + + for (int j = 0; j < nbo; j++) { + if (block->pm4_bo_index[j]) { + /* find relocation */ + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); + r600_context_bo_flush(ctx, + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); + } + } + ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; + + memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, cp_dwords * 4); + ctx->pm4_cdwords += cp_dwords; + + block->status ^= R600_BLOCK_STATUS_RESOURCE_DIRTY; + block->nreg_dirty = 0; + LIST_DELINIT(&block->list); +} + void r600_context_flush_dest_caches(struct r600_context *ctx) { struct r600_bo *cb[8]; @@ -1209,7 +1401,7 @@ void r600_context_flush_dest_caches(struct r600_context *ctx) 0, cb[i]); } if (db) { - r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1), 0, db); + r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1), 0, db); } ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; @@ -1220,13 +1412,10 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) unsigned ndwords = 7; struct r600_block *dirty_block = NULL; struct r600_block *next_block; + uint32_t *pm4; if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ @@ -1245,11 +1434,11 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1261,25 +1450,31 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) r600_context_block_emit_dirty(ctx, dirty_block); } + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->resource_dirty, list) { + r600_context_block_resource_emit_dirty(ctx, dirty_block); + } + /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + pm4 = &ctx->pm4[ctx->pm4_cdwords]; + + pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); + pm4[1] = draw->vgt_index_type; + pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); + pm4[3] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = draw->indices_bo_offset; + pm4[6] = 0; + pm4[7] = draw->vgt_num_indices; + pm4[8] = draw->vgt_draw_initiator; + pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); + pm4[5] = draw->vgt_num_indices; + pm4[6] = draw->vgt_draw_initiator; } + ctx->pm4_cdwords += ndwords; ctx->flags |= (R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING); @@ -1287,21 +1482,17 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) ctx->pm4_dirty_cdwords = 0; } -void r600_context_flush(struct r600_context *ctx) +void r600_context_flush(struct r600_context *ctx, unsigned flags) { - struct drm_radeon_cs drmib = {}; - struct drm_radeon_cs_chunk chunks[2]; - uint64_t chunk_array[2]; - unsigned fence; - int r; + struct r600_block *enable_block = NULL; - if (!ctx->pm4_cdwords) + if (ctx->pm4_cdwords == ctx->init_dwords) return; /* suspend queries */ r600_context_queries_suspend(ctx); - if (ctx->radeon->family >= CHIP_CEDAR) + if (ctx->radeon->chip_class >= EVERGREEN) evergreen_context_flush_dest_caches(ctx); else r600_context_flush_dest_caches(ctx); @@ -1309,71 +1500,48 @@ void r600_context_flush(struct r600_context *ctx) /* partial flush is needed to avoid lockups on some chips with user fences */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - /* emit fence */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); - ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); - -#if 1 - /* emit cs */ - drmib.num_chunks = 2; - drmib.chunks = (uint64_t)(uintptr_t)chunk_array; - chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->pm4_cdwords; - chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; - chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; - chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; - chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; - r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, - sizeof(struct drm_radeon_cs)); -#else - *ctx->radeon->cfence = ctx->radeon->fence; -#endif - - r600_context_update_fenced_list(ctx); - - fence = ctx->radeon->fence + 1; - if (fence < ctx->radeon->fence) { - /* wrap around */ - fence = 1; - r600_context_fence_wraparound(ctx, fence); - } - ctx->radeon->fence = fence; + + /* Flush the CS. */ + ctx->cs->cdw = ctx->pm4_cdwords; + ctx->radeon->ws->cs_flush(ctx->cs, flags); + + /* We need to get the pointer to the other CS, + * the command streams are double-buffered. */ + ctx->pm4 = ctx->cs->buf; /* restart */ for (int i = 0; i < ctx->creloc; i++) { - ctx->bo[i]->reloc = NULL; ctx->bo[i]->last_flush = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + r600_bo_reference(&ctx->bo[i], NULL); } ctx->creloc = 0; ctx->pm4_dirty_cdwords = 0; ctx->pm4_cdwords = 0; ctx->flags = 0; + r600_init_cs(ctx); + /* resume queries */ - r600_context_queries_resume(ctx); + r600_context_queries_resume(ctx, TRUE); /* set all valid group as dirty so they get reemited on * next draw command */ - for (int i = 0; i < ctx->nblocks; i++) { - if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) { - if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) { - LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty); + LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) { + if (!(enable_block->flags & BLOCK_FLAG_RESOURCE)) { + if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) { + LIST_ADDTAIL(&enable_block->list,&ctx->dirty); + enable_block->status |= R600_BLOCK_STATUS_DIRTY; + } + } else { + if(!(enable_block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) { + LIST_ADDTAIL(&enable_block->list,&ctx->resource_dirty); + enable_block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } - ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords; - ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY; - ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg; } + ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + + enable_block->pm4_flush_ndwords; + enable_block->nreg_dirty = enable_block->nreg; } } @@ -1381,10 +1549,9 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, { unsigned ndwords = 10; - if (((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) || - (ctx->creloc >= (ctx->nreloc - 1))) { + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); @@ -1396,125 +1563,47 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo); -} - -void r600_context_dump_bof(struct r600_context *ctx, const char *file) -{ - bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; - unsigned i; - - root = device_id = bcs = blob = array = bo = size = handle = NULL; - root = bof_object(); - if (root == NULL) - goto out_err; - device_id = bof_int32(ctx->radeon->device); - if (device_id == NULL) - goto out_err; - if (bof_object_set(root, "device_id", device_id)) - goto out_err; - bof_decref(device_id); - device_id = NULL; - /* dump relocs */ - blob = bof_blob(ctx->creloc * 16, ctx->reloc); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "reloc", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump cs */ - blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "pm4", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump bo */ - array = bof_array(); - if (array == NULL) - goto out_err; - for (i = 0; i < ctx->creloc; i++) { - struct radeon_bo *rbo = ctx->bo[i]; - bo = bof_object(); - if (bo == NULL) - goto out_err; - size = bof_int32(rbo->size); - if (size == NULL) - goto out_err; - if (bof_object_set(bo, "size", size)) - goto out_err; - bof_decref(size); - size = NULL; - handle = bof_int32(rbo->handle); - if (handle == NULL) - goto out_err; - if (bof_object_set(bo, "handle", handle)) - goto out_err; - bof_decref(handle); - handle = NULL; - radeon_bo_map(ctx->radeon, rbo); - blob = bof_blob(rbo->size, rbo->data); - radeon_bo_unmap(ctx->radeon, rbo); - if (blob == NULL) - goto out_err; - if (bof_object_set(bo, "data", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - if (bof_array_append(array, bo)) - goto out_err; - bof_decref(bo); - bo = NULL; - } - if (bof_object_set(root, "bo", array)) - goto out_err; - bof_dump_file(root, file); -out_err: - bof_decref(blob); - bof_decref(array); - bof_decref(bo); - bof_decref(size); - bof_decref(handle); - bof_decref(device_id); - bof_decref(root); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); } static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) { + unsigned results_base = query->results_start; u64 start, end; - u32 *results; - int i; - int size; + u32 *results, *current_result; if (wait) - results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_READ); else - results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ); if (!results) return FALSE; - size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); - for (i = 0; i < size; i += 4) { - start = (u64)results[i] | (u64)results[i + 1] << 32; - end = (u64)results[i + 2] | (u64)results[i + 3] << 32; + + /* count all results across all data blocks */ + while (results_base != query->results_end) { + current_result = (u32*)((char*)results + results_base); + + start = (u64)current_result[0] | (u64)current_result[1] << 32; + end = (u64)current_result[2] | (u64)current_result[3] << 32; if (((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) || query->type == PIPE_QUERY_TIME_ELAPSED) { query->result += end - start; } + + results_base += 4 * 4; + if (results_base >= query->buffer_size) + results_base = 0; } - r600_bo_unmap(ctx->radeon, query->buffer); - query->num_results = 0; + query->results_start = query->results_end; + r600_bo_unmap(ctx->radeon, query->buffer); return TRUE; } void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { - unsigned required_space; - int num_backends = r600_get_num_backends(ctx->radeon); + unsigned required_space, new_results_end; /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ if (query->type == PIPE_QUERY_TIME_ELAPSED) @@ -1524,50 +1613,68 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); + } + + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { + /* Count queries emitted without flushes, and flush if more than + * half of buffer used, to avoid overwriting results which may be + * still in use. */ + if (query->state & R600_QUERY_STATE_FLUSHED) { + query->queries_emitted = 1; + } else { + if (++query->queries_emitted > query->buffer_size / query->result_size / 2) + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); + } } - /* if query buffer is full force a flush */ - if (query->num_results*4 >= query->buffer_size - 16) { - r600_context_flush(ctx); + new_results_end = query->results_end + query->result_size; + if (new_results_end >= query->buffer_size) + new_results_end = 0; + + /* collect current results if query buffer is full */ + if (new_results_end == query->results_start) { + if (!(query->state & R600_QUERY_STATE_FLUSHED)) + r600_context_flush(ctx, 0); r600_query_result(ctx, query, TRUE); } - if (query->type == PIPE_QUERY_OCCLUSION_COUNTER && - num_backends > 0 && num_backends < ctx->max_db) { - /* as per info on ZPASS the driver must set the unusued DB top bits */ + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { u32 *results; int i; - results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_WRITE); if (results) { - memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4); - - for (i = num_backends; i < ctx->max_db; i++) { - results[(i * 4)+1] = 0x80000000; - results[(i * 4)+3] = 0x80000000; + results = (u32*)((char*)results + query->results_end); + memset(results, 0, query->result_size); + + /* Set top bits for unused backends */ + for (i = 0; i < ctx->max_db; i++) { + if (!(ctx->backend_mask & (1<radeon, query->buffer); } } - + /* emit begin query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->state |= R600_QUERY_STATE_STARTED; query->state ^= R600_QUERY_STATE_ENDED; @@ -1576,49 +1683,74 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) void r600_query_end(struct r600_context *ctx, struct r600_query *query) { - /* emit begin query */ + /* emit end query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); + + query->results_end += query->result_size; + if (query->results_end >= query->buffer_size) + query->results_end = 0; - query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); query->state ^= R600_QUERY_STATE_STARTED; query->state |= R600_QUERY_STATE_ENDED; + query->state &= ~R600_QUERY_STATE_FLUSHED; + ctx->num_query_running--; } void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - if (operation == PREDICATION_OP_CLEAR) { + if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords) + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); + + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(PREDICATION_OP_CLEAR); } else { - int results_base = query->num_results - (4 * ctx->max_db); - - if (results_base < 0) - results_base = 0; - - ctx->pm4[ctx->pm4_cdwords++] = results_base*4 + r600_bo_offset(query->buffer); - ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(operation) | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW) | PREDICATION_DRAW_VISIBLE; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + unsigned results_base = query->results_start; + unsigned count; + u32 op; + + /* find count of the query data blocks */ + count = query->buffer_size + query->results_end - query->results_start; + if (count >= query->buffer_size) count-=query->buffer_size; + count /= query->result_size; + + if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords) + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); + + op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | + (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); + + /* emit predicate packets for all data blocks */ + while (results_base != query->results_end) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); + ctx->pm4[ctx->pm4_cdwords++] = results_base; + ctx->pm4[ctx->pm4_cdwords++] = op; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, + RADEON_USAGE_READ); + results_base += query->result_size; + if (results_base >= query->buffer_size) + results_base = 0; + /* set CONTINUE bit for all packets except the first */ + op |= PREDICATION_CONTINUE; + } } } @@ -1636,6 +1768,14 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query->type = query_type; query->buffer_size = 4096; + if (query_type == PIPE_QUERY_OCCLUSION_COUNTER) + query->result_size = 4 * 4 * ctx->max_db; + else + query->result_size = 4 * 4; + + /* adjust buffer size to simplify offsets wrapping math */ + query->buffer_size -= query->buffer_size % query->result_size; + /* As of GL4, query buffers are normally read by the CPU after * being written by the gpu, hence staging is probably a good * usage pattern. @@ -1654,7 +1794,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) { - r600_bo_reference(ctx->radeon, &query->buffer, NULL); + r600_bo_reference(&query->buffer, NULL); LIST_DELINIT(&query->list); free(query); } @@ -1665,8 +1805,8 @@ boolean r600_context_query_result(struct r600_context *ctx, { uint64_t *result = (uint64_t*)vresult; - if (query->num_results) { - r600_context_flush(ctx); + if (!(query->state & R600_QUERY_STATE_FLUSHED)) { + r600_context_flush(ctx, 0); } if (!r600_query_result(ctx, query, wait)) return FALSE; @@ -1690,11 +1830,14 @@ void r600_context_queries_suspend(struct r600_context *ctx) } } -void r600_context_queries_resume(struct r600_context *ctx) +void r600_context_queries_resume(struct r600_context *ctx, boolean flushed) { struct r600_query *query; LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) { + if (flushed) + query->state |= R600_QUERY_STATE_FLUSHED; + if (query->state & R600_QUERY_STATE_SUSPENDED) { r600_query_begin(ctx, query); query->state ^= R600_QUERY_STATE_SUSPENDED;