* Jerome Glisse
*/
#include "r600_hw_context_priv.h"
+#include "radeonsi_pm4.h"
#include "radeonsi_pipe.h"
#include "sid.h"
#include "util/u_memory.h"
#define GROUP_FORCE_NEW_BLOCK 0
/* Get backends mask */
-void r600_get_backend_mask(struct r600_context *ctx)
+void si_get_backend_mask(struct r600_context *ctx)
{
struct radeon_winsys_cs *cs = ctx->cs;
- struct r600_resource *buffer;
+ struct si_resource *buffer;
uint32_t *results;
unsigned num_backends = ctx->screen->info.r600_num_backends;
unsigned i, mask = 0;
if (ctx->screen->info.r600_backend_map_valid) {
unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
unsigned backend_map = ctx->screen->info.r600_backend_map;
- unsigned item_width, item_mask;
-
- if (ctx->chip_class >= CAYMAN) {
- item_width = 4;
- item_mask = 0x7;
- }
+ unsigned item_width = 4, item_mask = 0x7;
while(num_tile_pipes--) {
i = backend_map & item_mask;
/* otherwise backup path for older kernels */
/* create buffer for event data */
- buffer = (struct r600_resource*)
- pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_STAGING, ctx->max_db*16);
+ buffer = si_resource_create_custom(&ctx->screen->screen,
+ PIPE_USAGE_STAGING,
+ ctx->max_db*16);
if (!buffer)
goto err;
}
}
- pipe_resource_reference((struct pipe_resource**)&buffer, NULL);
+ si_resource_reference(&buffer, NULL);
if (mask != 0) {
ctx->backend_mask = mask;
return;
}
-static inline void r600_context_ps_partial_flush(struct r600_context *ctx)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
-
- if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
- return;
-
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
-
- ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
-}
-
-void r600_init_cs(struct r600_context *ctx)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
-
- /* All asics require this one */
- cs->buf[cs->cdw++] = PKT3(PKT3_CONTEXT_CONTROL, 1, 0);
- cs->buf[cs->cdw++] = 0x80000000;
- cs->buf[cs->cdw++] = 0x80000000;
-
- ctx->init_dwords = cs->cdw;
-}
-
-static void r600_init_block(struct r600_context *ctx,
- struct r600_block *block,
- const struct r600_reg *reg, int index, int nreg,
- unsigned opcode, unsigned offset_base)
-{
- int i = index;
- int j, n = nreg;
-
- /* initialize block */
- block->flags = 0;
- block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */
- block->start_offset = reg[i].offset;
- block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0);
- block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2;
- block->reg = &block->pm4[block->pm4_ndwords];
- block->pm4_ndwords += n;
- block->nreg = n;
- block->nreg_dirty = n;
- LIST_INITHEAD(&block->list);
- LIST_INITHEAD(&block->enable_list);
-
- for (j = 0; j < n; j++) {
- if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) {
- block->flags |= REG_FLAG_DIRTY_ALWAYS;
- }
- if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) {
- if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
- block->status |= R600_BLOCK_STATUS_ENABLED;
- LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
- LIST_ADDTAIL(&block->list,&ctx->dirty);
- }
- }
- if (reg[i+j].flags & REG_FLAG_FLUSH_CHANGE) {
- block->flags |= REG_FLAG_FLUSH_CHANGE;
- }
-
- if (reg[i+j].flags & REG_FLAG_NEED_BO) {
- block->nbo++;
- assert(block->nbo < R600_BLOCK_MAX_BO);
- block->pm4_bo_index[j] = block->nbo;
- block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0);
- block->pm4[block->pm4_ndwords++] = 0x00000000;
- block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1;
- }
- }
- /* check that we stay in limit */
- assert(block->pm4_ndwords < R600_BLOCK_MAX_REG);
-}
-
-int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg,
- unsigned opcode, unsigned offset_base)
-{
- struct r600_block *block;
- struct r600_range *range;
- int offset;
-
- for (unsigned i = 0, n = 0; i < nreg; i += n) {
- /* ignore new block balise */
- if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) {
- n = 1;
- continue;
- }
-
- /* register that need relocation are in their own group */
- /* find number of consecutive registers */
- n = 0;
- offset = reg[i].offset;
- while (reg[i + n].offset == offset) {
- n++;
- offset += 4;
- if ((n + i) >= nreg)
- break;
- if (n >= (R600_BLOCK_MAX_REG - 2))
- break;
- }
-
- /* allocate new block */
- block = calloc(1, sizeof(struct r600_block));
- if (block == NULL) {
- return -ENOMEM;
- }
- ctx->nblocks++;
- for (int j = 0; j < n; j++) {
- range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)];
- /* create block table if it doesn't exist */
- if (!range->blocks)
- range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *));
- if (!range->blocks)
- return -1;
-
- range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block;
- }
-
- r600_init_block(ctx, block, reg, i, n, opcode, offset_base);
-
- }
- return 0;
-}
-
-
/* initialize */
-void r600_context_fini(struct r600_context *ctx)
-{
- struct r600_block *block;
- struct r600_range *range;
-
- for (int i = 0; i < NUM_RANGES; i++) {
- if (!ctx->range[i].blocks)
- continue;
- for (int j = 0; j < (1 << HASH_SHIFT); j++) {
- block = ctx->range[i].blocks[j];
- if (block) {
- for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) {
- range = &ctx->range[CTX_RANGE_ID(offset)];
- range->blocks[CTX_BLOCK_ID(offset)] = NULL;
- }
- for (int k = 1; k <= block->nbo; k++) {
- pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL);
- }
- free(block);
- }
- }
- free(ctx->range[i].blocks);
- }
- free(ctx->range);
- free(ctx->blocks);
- ctx->ws->cs_destroy(ctx->cs);
-}
-
-int r600_setup_block_table(struct r600_context *ctx)
-{
- /* setup block table */
- int c = 0;
- ctx->blocks = calloc(ctx->nblocks, sizeof(void*));
- if (!ctx->blocks)
- return -ENOMEM;
- for (int i = 0; i < NUM_RANGES; i++) {
- if (!ctx->range[i].blocks)
- continue;
- for (int j = 0, add; j < (1 << HASH_SHIFT); j++) {
- if (!ctx->range[i].blocks[j])
- continue;
-
- add = 1;
- for (int k = 0; k < c; k++) {
- if (ctx->blocks[k] == ctx->range[i].blocks[j]) {
- add = 0;
- break;
- }
- }
- if (add) {
- assert(c < ctx->nblocks);
- ctx->blocks[c++] = ctx->range[i].blocks[j];
- j += (ctx->range[i].blocks[j]->nreg) - 1;
- }
- }
- }
-
- return 0;
-}
-
-void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
+void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in)
{
- struct r600_atom *state;
-
/* The number of dwords we already used in the CS so far. */
num_dw += ctx->cs->cdw;
if (count_draw_in) {
/* The number of dwords all the dirty states would take. */
- LIST_FOR_EACH_ENTRY(state, &ctx->dirty_states, head) {
- num_dw += state->num_dw;
- }
-
num_dw += ctx->pm4_dirty_cdwords;
/* The upper-bound of how much a draw command would take. */
/* Save 16 dwords for the fence mechanism. */
num_dw += 16;
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
+ }
+#endif
+
/* Flush if there's not enough space. */
if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
}
}
-void r600_context_dirty_block(struct r600_context *ctx,
- struct r600_block *block,
- int dirty, int index)
-{
- if ((index + 1) > block->nreg_dirty)
- block->nreg_dirty = index + 1;
-
- if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
- if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
- block->status |= R600_BLOCK_STATUS_ENABLED;
- LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
- }
- LIST_ADDTAIL(&block->list,&ctx->dirty);
-
- if (block->flags & REG_FLAG_FLUSH_CHANGE) {
- r600_context_ps_partial_flush(ctx);
- }
- }
-}
-
-void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state)
-{
- struct r600_block *block;
- int dirty;
- for (int i = 0; i < state->nregs; i++) {
- unsigned id, reloc_id;
- struct r600_pipe_reg *reg = &state->regs[i];
-
- block = reg->block;
- id = reg->id;
-
- dirty = block->status & R600_BLOCK_STATUS_DIRTY;
-
- if (reg->value != block->reg[id]) {
- block->reg[id] = reg->value;
- dirty |= R600_BLOCK_STATUS_DIRTY;
- }
- if (block->flags & REG_FLAG_DIRTY_ALWAYS)
- dirty |= R600_BLOCK_STATUS_DIRTY;
- if (block->pm4_bo_index[id]) {
- /* find relocation */
- reloc_id = block->pm4_bo_index[id];
- pipe_resource_reference((struct pipe_resource**)&block->reloc[reloc_id].bo, ®->bo->b.b);
- block->reloc[reloc_id].bo_usage = reg->bo_usage;
- /* always force dirty for relocs for now */
- dirty |= R600_BLOCK_STATUS_DIRTY;
- }
-
- if (dirty)
- r600_context_dirty_block(ctx, block, dirty, id);
- }
-}
-
-struct r600_resource *r600_context_reg_bo(struct r600_context *ctx, unsigned offset)
-{
- struct r600_range *range;
- struct r600_block *block;
- unsigned id;
-
- range = &ctx->range[CTX_RANGE_ID(offset)];
- block = range->blocks[CTX_BLOCK_ID(offset)];
- offset -= block->start_offset;
- id = block->pm4_bo_index[offset >> 2];
- if (block->reloc[id].bo) {
- return block->reloc[id].bo;
- }
- return NULL;
-}
-
-void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
- int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
- int cp_dwords = block->pm4_ndwords, start_dword = 0;
- int new_dwords = 0;
- int nbo = block->nbo;
-
- if (block->nreg_dirty == 0 && optional) {
- goto out;
- }
-
- if (nbo) {
- ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH;
-
- for (int j = 0; j < block->nreg; j++) {
- if (block->pm4_bo_index[j]) {
- /* find relocation */
- struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
- block->pm4[reloc->bo_pm4_index] =
- r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
- nbo--;
- if (nbo == 0)
- break;
- }
- }
- ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
- }
-
- optional &= (block->nreg_dirty != block->nreg);
- if (optional) {
- new_dwords = block->nreg_dirty;
- start_dword = cs->cdw;
- cp_dwords = new_dwords + 2;
- }
- memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4);
- cs->cdw += cp_dwords;
-
- if (optional) {
- uint32_t newword;
-
- newword = cs->buf[start_dword];
- newword &= PKT_COUNT_C;
- newword |= PKT_COUNT_S(new_dwords);
- cs->buf[start_dword] = newword;
- }
-out:
- block->status ^= R600_BLOCK_STATUS_DIRTY;
- block->nreg_dirty = 0;
- LIST_DELINIT(&block->list);
-}
-
-void r600_inval_shader_cache(struct r600_context *ctx)
+static void r600_flush_framebuffer(struct r600_context *ctx)
{
- ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-}
-
-void r600_inval_texture_cache(struct r600_context *ctx)
-{
- ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
- r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-}
+ struct si_pm4_state *pm4;
-void r600_inval_vertex_cache(struct r600_context *ctx)
-{
- /* Some GPUs don't have the vertex cache and must use the texture cache instead. */
- ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
- r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-}
-
-void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now)
-{
if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
return;
- ctx->atom_surface_sync.flush_flags |=
- r600_get_cb_flush_flags(ctx) |
- (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0);
+ pm4 = si_pm4_alloc_state(ctx);
- if (flush_now) {
- r600_emit_atom(ctx, &ctx->atom_surface_sync.atom);
- } else {
- r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
- }
+ if (pm4 == NULL)
+ return;
+
+ si_cmd_surface_sync(pm4, S_0085F0_CB0_DEST_BASE_ENA(1) |
+ S_0085F0_CB1_DEST_BASE_ENA(1) |
+ S_0085F0_CB2_DEST_BASE_ENA(1) |
+ S_0085F0_CB3_DEST_BASE_ENA(1) |
+ S_0085F0_CB4_DEST_BASE_ENA(1) |
+ S_0085F0_CB5_DEST_BASE_ENA(1) |
+ S_0085F0_CB6_DEST_BASE_ENA(1) |
+ S_0085F0_CB7_DEST_BASE_ENA(1) |
+ S_0085F0_DB_ACTION_ENA(1) |
+ S_0085F0_DB_DEST_BASE_ENA(1));
+ si_pm4_emit(ctx, pm4);
+ si_pm4_free_state(ctx, pm4, ~0);
ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
}
-void r600_context_flush(struct r600_context *ctx, unsigned flags)
+void si_context_flush(struct r600_context *ctx, unsigned flags)
{
struct radeon_winsys_cs *cs = ctx->cs;
- struct r600_block *enable_block = NULL;
bool queries_suspended = false;
+
+#if 0
bool streamout_suspended = false;
+#endif
- if (cs->cdw == ctx->init_dwords)
+ if (!cs->cdw)
return;
/* suspend queries */
queries_suspended = true;
}
+#if 0
if (ctx->num_cs_dw_streamout_end) {
r600_context_streamout_end(ctx);
streamout_suspended = true;
}
+#endif
- r600_flush_framebuffer(ctx, true);
+ r600_flush_framebuffer(ctx);
/* partial flush is needed to avoid lockups on some chips with user fences */
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+ /* force to keep tiling flags */
+ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
+
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ struct r600_screen *rscreen = ctx->screen;
+ unsigned i;
+
+ for (i = 0; i < cs->cdw; i++) {
+ fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
+ }
+ rscreen->cs_count++;
+ }
+#endif
+
/* Flush the CS. */
- ctx->ws->cs_flush(ctx->cs, flags);
+ ctx->ws->cs_flush(ctx->cs, flags, 0);
+
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ struct r600_screen *rscreen = ctx->screen;
+ unsigned i;
+
+ for (i = 0; i < 10; i++) {
+ usleep(5);
+ if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
+ break;
+ }
+ }
+ if (i == 10) {
+ fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
+ rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
+ } else {
+ fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
+ }
+ }
+#endif
ctx->pm4_dirty_cdwords = 0;
ctx->flags = 0;
- r600_init_cs(ctx);
-
+#if 0
if (streamout_suspended) {
ctx->streamout_start = TRUE;
ctx->streamout_append_bitmask = ~0;
}
+#endif
/* resume queries */
if (queries_suspended) {
/* set all valid group as dirty so they get reemited on
* next draw command
*/
- LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) {
- if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) {
- LIST_ADDTAIL(&enable_block->list,&ctx->dirty);
- enable_block->status |= R600_BLOCK_STATUS_DIRTY;
- }
- ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords;
- enable_block->nreg_dirty = enable_block->nreg;
- }
+ si_pm4_reset_emitted(ctx);
}
-void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
+void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
{
struct radeon_winsys_cs *cs = ctx->cs;
uint64_t va;
- r600_need_cs_space(ctx, 10, FALSE);
+ si_need_cs_space(ctx, 10, FALSE);
va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
va = va + (offset << 2);
uint32_t *results;
uint64_t va;
- r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
+ si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
uint64_t va;
if (operation == PREDICATION_OP_CLEAR) {
- r600_need_cs_space(ctx, 3, FALSE);
+ si_need_cs_space(ctx, 3, FALSE);
cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
cs->buf[cs->cdw++] = 0;
count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0;
count /= query->result_size;
- r600_need_cs_space(ctx, 5 * count, TRUE);
+ si_need_cs_space(ctx, 5 * count, TRUE);
op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
* being written by the gpu, hence staging is probably a good
* usage pattern.
*/
- query->buffer = (struct r600_resource*)
- pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buffer_size);
+ query->buffer = si_resource_create_custom(&ctx->screen->screen,
+ PIPE_USAGE_STAGING,
+ buffer_size);
if (!query->buffer) {
FREE(query);
return NULL;
void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
{
- pipe_resource_reference((struct pipe_resource**)&query->buffer, NULL);
+ si_resource_reference(&query->buffer, NULL);
free(query);
}
}
}
-void r600_context_streamout_begin(struct r600_context *ctx)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
- struct r600_so_target **t = ctx->so_targets;
- unsigned *strides = ctx->vs_shader_so_strides;
- unsigned buffer_en, i;
-
- buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
- (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
- (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
- (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
- ctx->num_cs_dw_streamout_end =
- 12 + /* flush_vgt_streamout */
- util_bitcount(buffer_en) * 8 +
- 3;
-
- r600_need_cs_space(ctx,
- 12 + /* flush_vgt_streamout */
- 6 + /* enables */
- util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
- util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
- ctx->num_cs_dw_streamout_end, TRUE);
-
- if (ctx->chip_class >= CAYMAN) {
- evergreen_flush_vgt_streamout(ctx);
- evergreen_set_streamout_enable(ctx, buffer_en);
- }
-
- for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
- if (t[i]) {
- t[i]->stride = strides[i];
- t[i]->so_index = i;
-
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
- cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
- 16*i - SI_CONTEXT_REG_OFFSET) >> 2;
- cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
- t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
- cs->buf[cs->cdw++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */
- cs->buf[cs->cdw++] = 0; /* BUFFER_BASE */
-
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] =
- r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE);
-
- if (ctx->streamout_append_bitmask & (1 << i)) {
- /* Append. */
- cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
- cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
- cs->buf[cs->cdw++] = 0; /* unused */
- cs->buf[cs->cdw++] = 0; /* unused */
- cs->buf[cs->cdw++] = 0; /* src address lo */
- cs->buf[cs->cdw++] = 0; /* src address hi */
-
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] =
- r600_context_bo_reloc(ctx, t[i]->filled_size,
- RADEON_USAGE_READ);
- } else {
- /* Start from the beginning. */
- cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
- cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
- cs->buf[cs->cdw++] = 0; /* unused */
- cs->buf[cs->cdw++] = 0; /* unused */
- cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
- cs->buf[cs->cdw++] = 0; /* unused */
- }
- }
-#endif
- }
-}
-
-void r600_context_streamout_end(struct r600_context *ctx)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
- struct r600_so_target **t = ctx->so_targets;
- unsigned i, flush_flags = 0;
-
- evergreen_flush_vgt_streamout(ctx);
-
- for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
- if (t[i]) {
- cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
- cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
- STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
- cs->buf[cs->cdw++] = 0; /* dst address lo */
- cs->buf[cs->cdw++] = 0; /* dst address hi */
- cs->buf[cs->cdw++] = 0; /* unused */
- cs->buf[cs->cdw++] = 0; /* unused */
-
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] =
- r600_context_bo_reloc(ctx, t[i]->filled_size,
- RADEON_USAGE_WRITE);
-
- flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
- }
-#endif
- }
-
- evergreen_set_streamout_enable(ctx, 0);
-
- ctx->atom_surface_sync.flush_flags |= flush_flags;
- r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-
- ctx->num_cs_dw_streamout_end = 0;
-
- /* XXX print some debug info */
- for (i = 0; i < ctx->num_so_targets; i++) {
- if (!t[i])
- continue;
-
- uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
- printf("FILLED_SIZE%i: %u\n", i, *ptr);
- ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
- }
-}
-
void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
{
struct radeon_winsys_cs *cs = ctx->cs;
- r600_need_cs_space(ctx, 14 + 21, TRUE);
+ si_need_cs_space(ctx, 14 + 21, TRUE);
cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - SI_CONTEXT_REG_OFFSET) >> 2;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
-#if 0 /* I have not found this useful yet. */
- cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
- cs->buf[cs->cdw++] = COPY_DW_SRC_IS_REG | COPY_DW_DST_IS_REG;
- cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* src register */
- cs->buf[cs->cdw++] = 0; /* unused */
- cs->buf[cs->cdw++] = R_0085F4_CP_COHER_SIZE >> 2; /* dst register */
- cs->buf[cs->cdw++] = 0; /* unused */
-
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
- cs->buf[cs->cdw++] = (R_0085F0_CP_COHER_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
- cs->buf[cs->cdw++] = S_0085F0_SO0_DEST_BASE_ENA(1) << t->so_index;
-
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
- cs->buf[cs->cdw++] = (R_0085F8_CP_COHER_BASE - SI_CONFIG_REG_OFFSET) >> 2;
- cs->buf[cs->cdw++] = t->b.buffer_offset >> 2;
+}
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, (struct r600_resource*)t->b.buffer,
- RADEON_USAGE_WRITE);
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx)
+{
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint64_t va;
- cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
- cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
- cs->buf[cs->cdw++] = R_0085FC_CP_COHER_STATUS >> 2; /* register */
- cs->buf[cs->cdw++] = 0;
- cs->buf[cs->cdw++] = 0; /* reference value */
- cs->buf[cs->cdw++] = 0xffffffff; /* mask */
- cs->buf[cs->cdw++] = 4; /* poll interval */
-#endif
+ va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
+ r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
+ cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
+ cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
+ PKT3_WRITE_DATA_WR_CONFIRM |
+ PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
+ cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
+ cs->buf[cs->cdw++] = cs->cdw;
+ cs->buf[cs->cdw++] = rscreen->cs_count;
}
+#endif