#define WAIT_REG_MEM_EQUAL 3
#define WAIT_REG_MEM_NOT_EQUAL 4
#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
+#define WAIT_REG_MEM_PFP (1 << 8)
#define PKT3_MEM_WRITE 0x3D /* not on CIK */
#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
#define R_3F0_IB_BASE_LO 0x3F0
* \param old_value Previous fence value (for a bug workaround)
* \param new_value Fence value to write for this event.
*/
-void si_gfx_write_event_eop(struct si_context *ctx,
- unsigned event, unsigned event_flags,
- unsigned dst_sel, unsigned int_sel, unsigned data_sel,
- struct r600_resource *buf, uint64_t va,
- uint32_t new_fence, unsigned query_type)
+void si_cp_release_mem(struct si_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned dst_sel, unsigned int_sel, unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type)
{
struct radeon_cmdbuf *cs = ctx->gfx_cs;
unsigned op = EVENT_TYPE(event) |
}
}
-unsigned si_gfx_write_fence_dwords(struct si_screen *screen)
+unsigned si_cp_write_fence_dwords(struct si_screen *screen)
{
unsigned dwords = 6;
return dwords;
}
-void si_gfx_wait_fence(struct si_context *ctx,
- uint64_t va, uint32_t ref, uint32_t mask)
+void si_cp_wait_mem(struct si_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask, unsigned flags)
{
struct radeon_cmdbuf *cs = ctx->gfx_cs;
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1) | flags);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, ref); /* reference value */
radeon_emit(cs, fence_va >> 32);
radeon_emit(cs, 0x80000000);
} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
- si_gfx_write_event_eop(ctx,
- V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
- EOP_DATA_SEL_VALUE_32BIT,
- NULL, fence_va, 0x80000000,
- PIPE_QUERY_GPU_FINISHED);
+ si_cp_release_mem(ctx,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM,
+ EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DATA_SEL_VALUE_32BIT,
+ NULL, fence_va, 0x80000000,
+ PIPE_QUERY_GPU_FINISHED);
} else {
assert(false);
}
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
- si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
- EOP_DATA_SEL_VALUE_32BIT,
- buffer, va, 0, SI_NOT_QUERY);
- si_gfx_wait_fence(sctx, va, 0, 0xffffffff);
+ si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM,
+ EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DATA_SEL_VALUE_32BIT,
+ buffer, va, 0, SI_NOT_QUERY);
+ si_cp_wait_mem(sctx, va, 0, 0xffffffff, 0);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
if (!pc)
return;
- pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(screen);
+ pc->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
pc->num_instance_cs_dwords = 3;
pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
uint64_t offset, uint64_t size, unsigned value);
/* si_fence.c */
-void si_gfx_write_event_eop(struct si_context *ctx,
- unsigned event, unsigned event_flags,
- unsigned dst_sel, unsigned int_sel, unsigned data_sel,
- struct r600_resource *buf, uint64_t va,
- uint32_t new_fence, unsigned query_type);
-unsigned si_gfx_write_fence_dwords(struct si_screen *screen);
-void si_gfx_wait_fence(struct si_context *ctx,
- uint64_t va, uint32_t ref, uint32_t mask);
+void si_cp_release_mem(struct si_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned dst_sel, unsigned int_sel, unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type);
+unsigned si_cp_write_fence_dwords(struct si_screen *screen);
+void si_cp_wait_mem(struct si_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask, unsigned flags);
void si_init_fence_functions(struct si_context *ctx);
void si_init_screen_fence_functions(struct si_screen *screen);
struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
query->result_size = 16 * sscreen->info.num_render_backends;
query->result_size += 16; /* for the fence + alignment */
- query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen);
+ query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
break;
case SI_QUERY_TIME_ELAPSED_SDMA:
/* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 24;
- query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen);
+ query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 16;
- query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen);
+ query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
query->flags = SI_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
/* 11 values on GCN. */
query->result_size = 11 * 16;
query->result_size += 8; /* for the fence + alignment */
- query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen);
+ query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
break;
default:
assert(0);
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
- si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
- 0, EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
- EOP_DATA_SEL_TIMESTAMP, NULL, va,
- 0, query->b.type);
+ si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DST_SEL_MEM,
+ EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DATA_SEL_TIMESTAMP, NULL, va,
+ 0, query->b.type);
fence_va = va + 8;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
RADEON_PRIO_QUERY);
if (fence_va) {
- si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
- EOP_DATA_SEL_VALUE_32BIT,
- query->buffer.buf, fence_va, 0x80000000,
- query->b.type);
+ si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM,
+ EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DATA_SEL_VALUE_32BIT,
+ query->buffer.buf, fence_va, 0x80000000,
+ query->b.type);
}
}
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
va += params.fence_offset;
- si_gfx_wait_fence(sctx, va, 0x80000000, 0x80000000);
+ si_cp_wait_mem(sctx, va, 0x80000000, 0x80000000, 0);
}
sctx->b.launch_grid(&sctx->b, &grid);
/* Necessary for DCC */
if (sctx->chip_class == VI)
- si_gfx_write_event_eop(sctx,
- V_028A90_FLUSH_AND_INV_CB_DATA_TS,
- 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
- EOP_DATA_SEL_DISCARD, NULL,
- 0, 0, SI_NOT_QUERY);
+ si_cp_release_mem(sctx,
+ V_028A90_FLUSH_AND_INV_CB_DATA_TS,
+ 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
+ EOP_DATA_SEL_DISCARD, NULL,
+ 0, 0, SI_NOT_QUERY);
}
if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
va = sctx->wait_mem_scratch->gpu_address;
sctx->wait_mem_number++;
- si_gfx_write_event_eop(sctx, cb_db_event, tc_flags,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
- EOP_DATA_SEL_VALUE_32BIT,
- sctx->wait_mem_scratch, va,
- sctx->wait_mem_number, SI_NOT_QUERY);
- si_gfx_wait_fence(sctx, va, sctx->wait_mem_number, 0xffffffff);
+ si_cp_release_mem(sctx, cb_db_event, tc_flags,
+ EOP_DST_SEL_MEM,
+ EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DATA_SEL_VALUE_32BIT,
+ sctx->wait_mem_scratch, va,
+ sctx->wait_mem_number, SI_NOT_QUERY);
+ si_cp_wait_mem(sctx, va, sctx->wait_mem_number, 0xffffffff, 0);
}
/* Make sure ME is idle (it executes most packets) before continuing.