* Authors:
* Jerome Glisse
*/
-#include "r600_hw_context_priv.h"
+#include "../radeon/r600_cs.h"
#include "radeonsi_pm4.h"
#include "radeonsi_pipe.h"
#include "sid.h"
/* Get backends mask */
void si_get_backend_mask(struct r600_context *ctx)
{
- struct radeon_winsys_cs *cs = ctx->cs;
- struct si_resource *buffer;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct r600_resource *buffer;
uint32_t *results;
- unsigned num_backends = ctx->screen->info.r600_num_backends;
+ unsigned num_backends = ctx->screen->b.info.r600_num_backends;
unsigned i, mask = 0;
/* if backend_map query is supported by the kernel */
- if (ctx->screen->info.r600_backend_map_valid) {
- unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
- unsigned backend_map = ctx->screen->info.r600_backend_map;
+ if (ctx->screen->b.info.r600_backend_map_valid) {
+ unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes;
+ unsigned backend_map = ctx->screen->b.info.r600_backend_map;
unsigned item_width = 4, item_mask = 0x7;
while(num_tile_pipes--) {
/* otherwise backup path for older kernels */
/* create buffer for event data */
- buffer = si_resource_create_custom(&ctx->screen->screen,
+ buffer = r600_resource_create_custom(&ctx->screen->b.b,
PIPE_USAGE_STAGING,
ctx->max_db*16);
if (!buffer)
goto err;
/* initialize buffer with zeroes */
- results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+ results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
if (results) {
uint64_t va = 0;
memset(results, 0, ctx->max_db * 4 * 4);
- ctx->ws->buffer_unmap(buffer->cs_buf);
+ ctx->b.ws->buffer_unmap(buffer->cs_buf);
/* emit EVENT_WRITE for ZPASS_DONE */
- va = r600_resource_va(&ctx->screen->screen, (void *)buffer);
+ va = r600_resource_va(&ctx->screen->b.b, (void *)buffer);
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
cs->buf[cs->cdw++] = va;
cs->buf[cs->cdw++] = va >> 32;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE);
/* analyze results */
- results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ);
+ results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ);
if (results) {
for(i = 0; i < ctx->max_db; i++) {
/* at least highest bit will be set if backend is used */
if (results[i*4 + 1])
mask |= (1<<i);
}
- ctx->ws->buffer_unmap(buffer->cs_buf);
+ ctx->b.ws->buffer_unmap(buffer->cs_buf);
}
}
- si_resource_reference(&buffer, NULL);
+ r600_resource_reference(&buffer, NULL);
if (mask != 0) {
ctx->backend_mask = mask;
int i;
/* The number of dwords we already used in the CS so far. */
- num_dw += ctx->cs->cdw;
+ num_dw += ctx->b.rings.gfx.cs->cdw;
for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
if (ctx->atoms.array[i]->dirty) {
num_dw += ctx->num_cs_dw_nontimer_queries_suspend;
/* Count in streamout_end at the end of CS. */
- num_dw += ctx->num_cs_dw_streamout_end;
+ if (ctx->b.streamout.begin_emitted) {
+ num_dw += ctx->b.streamout.num_dw_for_end;
+ }
/* Count in render_condition(NULL) at the end of CS. */
if (ctx->predicate_drawing) {
}
/* Count in framebuffer cache flushes at the end of CS. */
- num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
-
- /* Save 16 dwords for the fence mechanism. */
- num_dw += 16;
+ num_dw += ctx->atoms.cache_flush->num_dw;
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
/* Flush if there's not enough space. */
if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
- radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
+ radeonsi_flush(&ctx->b.b, NULL, RADEON_FLUSH_ASYNC);
}
}
-static void r600_flush_framebuffer(struct r600_context *ctx)
-{
- struct si_pm4_state *pm4;
-
- if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
- return;
-
- pm4 = si_pm4_alloc_state(ctx);
-
- if (pm4 == NULL)
- return;
-
- si_cmd_surface_sync(pm4, S_0085F0_CB0_DEST_BASE_ENA(1) |
- S_0085F0_CB1_DEST_BASE_ENA(1) |
- S_0085F0_CB2_DEST_BASE_ENA(1) |
- S_0085F0_CB3_DEST_BASE_ENA(1) |
- S_0085F0_CB4_DEST_BASE_ENA(1) |
- S_0085F0_CB5_DEST_BASE_ENA(1) |
- S_0085F0_CB6_DEST_BASE_ENA(1) |
- S_0085F0_CB7_DEST_BASE_ENA(1) |
- S_0085F0_DB_ACTION_ENA(1) |
- S_0085F0_DB_DEST_BASE_ENA(1));
- si_cmd_flush_and_inv_cb_meta(pm4);
-
- si_pm4_emit(ctx, pm4);
- si_pm4_free_state(ctx, pm4, ~0);
-
- ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
- ctx->flush_and_inv_cb_meta = false;
-}
-
void si_context_flush(struct r600_context *ctx, unsigned flags)
{
- struct radeon_winsys_cs *cs = ctx->cs;
- bool queries_suspended = false;
-
-#if 0
- bool streamout_suspended = false;
-#endif
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
if (!cs->cdw)
return;
/* suspend queries */
+ ctx->nontimer_queries_suspended = false;
if (ctx->num_cs_dw_nontimer_queries_suspend) {
r600_context_queries_suspend(ctx);
- queries_suspended = true;
+ ctx->nontimer_queries_suspended = true;
}
-#if 0
- if (ctx->num_cs_dw_streamout_end) {
- r600_context_streamout_end(ctx);
- streamout_suspended = true;
+ ctx->b.streamout.suspended = false;
+
+ if (ctx->b.streamout.begin_emitted) {
+ r600_emit_streamout_end(&ctx->b);
+ ctx->b.streamout.suspended = true;
}
-#endif
- r600_flush_framebuffer(ctx);
+ ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+ R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_FLUSH_AND_INV_DB |
+ R600_CONTEXT_FLUSH_AND_INV_DB_META |
+ R600_CONTEXT_INV_TEX_CACHE;
+ si_emit_cache_flush(&ctx->b, NULL);
- /* partial flush is needed to avoid lockups on some chips with user fences */
+ /* this is probably not needed anymore */
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
#endif
/* Flush the CS. */
- ctx->ws->cs_flush(ctx->cs, flags, 0);
+ ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, 0);
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
}
#endif
+ si_begin_new_cs(ctx);
+}
+
+void si_begin_new_cs(struct r600_context *ctx)
+{
ctx->pm4_dirty_cdwords = 0;
- ctx->flags = 0;
+
+ /* Flush read caches at the beginning of CS. */
+ ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+ R600_CONTEXT_INV_CONST_CACHE |
+ R600_CONTEXT_INV_SHADER_CACHE;
/* set all valid group as dirty so they get reemited on
* next draw command
si_pm4_emit(ctx, ctx->queued.named.init);
ctx->emitted.named.init = ctx->queued.named.init;
-#if 0
- if (streamout_suspended) {
- ctx->streamout_start = TRUE;
- ctx->streamout_append_bitmask = ~0;
+ if (ctx->b.streamout.suspended) {
+ ctx->b.streamout.append_bitmask = ctx->b.streamout.enabled_mask;
+ r600_streamout_buffers_dirty(&ctx->b);
}
-#endif
/* resume queries */
- if (queries_suspended) {
+ if (ctx->nontimer_queries_suspended) {
r600_context_queries_resume(ctx);
}
si_all_descriptors_begin_new_cs(ctx);
}
-void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
- uint64_t va;
-
- si_need_cs_space(ctx, 10, FALSE);
-
- va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
- va = va + (offset << 2);
-
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
- cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */
- /* DATA_SEL | INT_EN | ADDRESS_HI */
- cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
- cs->buf[cs->cdw++] = value; /* DATA_LO */
- cs->buf[cs->cdw++] = 0; /* DATA_HI */
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
-}
-
static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
bool test_status_bit)
{
unsigned results_base = query->results_start;
char *map;
- map = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs,
+ map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs,
PIPE_TRANSFER_READ |
(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
if (!map)
}
query->results_start = query->results_end;
- ctx->ws->buffer_unmap(query->buffer->cs_buf);
+ ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
return TRUE;
}
void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
{
- struct radeon_winsys_cs *cs = ctx->cs;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
unsigned new_results_end, i;
uint32_t *results;
uint64_t va;
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+ results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
if (results) {
results = (uint32_t*)((char*)results + query->results_end);
memset(results, 0, query->result_size);
results[(i * 4)+3] = 0x80000000;
}
}
- ctx->ws->buffer_unmap(query->buffer->cs_buf);
+ ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
}
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+ results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
results = (uint32_t*)((char*)results + query->results_end);
memset(results, 0, query->result_size);
- ctx->ws->buffer_unmap(query->buffer->cs_buf);
+ ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
break;
default:
assert(0);
}
/* emit begin query */
- va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
va += query->results_end;
switch (query->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
- cs->buf[cs->cdw++] = query->results_end;
- cs->buf[cs->cdw++] = 0;
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
break;
case PIPE_QUERY_TIME_ELAPSED:
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
assert(0);
}
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
if (!si_is_timer_query(query->type)) {
ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
void r600_query_end(struct r600_context *ctx, struct r600_query *query)
{
- struct radeon_winsys_cs *cs = ctx->cs;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
uint64_t va;
unsigned new_results_end;
}
}
- va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
/* emit end query */
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ va += query->results_end + query->result_size/2;
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
- cs->buf[cs->cdw++] = query->results_end + query->result_size/2;
- cs->buf[cs->cdw++] = 0;
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
break;
case PIPE_QUERY_TIME_ELAPSED:
va += query->results_end + query->result_size/2;
assert(0);
}
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
int flag_wait)
{
- struct radeon_winsys_cs *cs = ctx->cs;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
uint64_t va;
if (operation == PREDICATION_OP_CLEAR) {
op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
- va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
/* emit predicate packets for all data blocks */
while (results_base != query->results_end) {
cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer,
- RADEON_USAGE_READ);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
+ query->buffer, RADEON_USAGE_READ);
results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
/* set CONTINUE bit for all packets except the first */
* being written by the gpu, hence staging is probably a good
* usage pattern.
*/
- query->buffer = si_resource_create_custom(&ctx->screen->screen,
+ query->buffer = r600_resource_create_custom(&ctx->screen->b.b,
PIPE_USAGE_STAGING,
buffer_size);
if (!query->buffer) {
void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
{
- si_resource_reference(&query->buffer, NULL);
+ r600_resource_reference(&query->buffer, NULL);
free(query);
}
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
- *result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq;
+ *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq;
break;
case PIPE_QUERY_SO_STATISTICS:
*result_so = query->result.so;
}
}
-void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
-{
- struct radeon_winsys_cs *cs = ctx->cs;
- si_need_cs_space(ctx, 14 + 21, TRUE);
-
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
- cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - SI_CONTEXT_REG_OFFSET) >> 2;
- cs->buf[cs->cdw++] = 0;
-
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
- cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - SI_CONTEXT_REG_OFFSET) >> 2;
- cs->buf[cs->cdw++] = t->stride >> 2;
-
-#if 0
- cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
- cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
- cs->buf[cs->cdw++] = 0; /* src address lo */
- cs->buf[cs->cdw++] = 0; /* src address hi */
- cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */
- cs->buf[cs->cdw++] = 0; /* unused */
-#endif
-
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
-
-}
-
#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx)
{