* \param old_value Previous fence value (for a bug workaround)
* \param new_value Fence value to write for this event.
*/
-void si_cp_release_mem(struct si_context *ctx,
+void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs,
unsigned event, unsigned event_flags,
unsigned dst_sel, unsigned int_sel, unsigned data_sel,
struct si_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type)
{
- struct radeon_cmdbuf *cs = ctx->gfx_cs;
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(event == V_028A90_CS_DONE ||
event == V_028A90_PS_DONE ? 6 : 5) |
unsigned sel = EOP_DST_SEL(dst_sel) |
EOP_INT_SEL(int_sel) |
EOP_DATA_SEL(data_sel);
+ bool compute_ib = !ctx->has_graphics ||
+ cs == ctx->prim_discard_compute_cs;
- if (ctx->chip_class >= GFX9) {
+ if (ctx->chip_class >= GFX9 ||
+ (compute_ib && ctx->chip_class >= GFX7)) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
* counters) must immediately precede every timestamp event to
* prevent a GPU hang on GFX9.
* Occlusion queries don't need to do it here, because they
* always do ZPASS_DONE before the timestamp.
*/
- if (ctx->chip_class == GFX9 &&
+ if (ctx->chip_class == GFX9 && !compute_ib &&
query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
}
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
+ radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, ctx->chip_class >= GFX9 ? 6 : 5, 0));
radeon_emit(cs, op);
radeon_emit(cs, sel);
radeon_emit(cs, va); /* address lo */
radeon_emit(cs, va >> 32); /* address hi */
radeon_emit(cs, new_fence); /* immediate data lo */
radeon_emit(cs, 0); /* immediate data hi */
- radeon_emit(cs, 0); /* unused */
+ if (ctx->chip_class >= GFX9)
+ radeon_emit(cs, 0); /* unused */
} else {
- if (ctx->chip_class == CIK ||
- ctx->chip_class == VI) {
+ if (ctx->chip_class == GFX7 ||
+ ctx->chip_class == GFX8) {
struct si_resource *scratch = ctx->eop_bug_scratch;
uint64_t va = scratch->gpu_address;
{
unsigned dwords = 6;
- if (screen->info.chip_class == CIK ||
- screen->info.chip_class == VI)
+ if (screen->info.chip_class == GFX7 ||
+ screen->info.chip_class == GFX8)
dwords *= 2;
return dwords;
struct radeon_winsys *ws = sctx->ws;
if (sctx->dma_cs)
- ws->cs_add_fence_dependency(sctx->dma_cs, fence);
- ws->cs_add_fence_dependency(sctx->gfx_cs, fence);
+ ws->cs_add_fence_dependency(sctx->dma_cs, fence, 0);
+ ws->cs_add_fence_dependency(sctx->gfx_cs, fence, 0);
}
static void si_add_syncobj_signal(struct si_context *sctx,
assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);
- /* Use uncached system memory for the fence. */
+ /* Use cached system memory for the fence. */
u_upload_alloc(ctx->cached_gtt_allocator, 0, 4, 4,
&fine->offset, (struct pipe_resource **)&fine->buf, (void **)&fence_ptr);
if (!fine->buf)
radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
- si_cp_release_mem(ctx,
+ si_cp_release_mem(ctx, ctx->gfx_cs,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
EOP_DATA_SEL_VALUE_32BIT,
}
}
-static boolean si_fence_finish(struct pipe_screen *screen,
- struct pipe_context *ctx,
- struct pipe_fence_handle *fence,
- uint64_t timeout)
+static bool si_fence_finish(struct pipe_screen *screen,
+ struct pipe_context *ctx,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
{
struct radeon_winsys *rws = ((struct si_screen*)screen)->ws;
struct si_multi_fence *sfence = (struct si_multi_fence *)fence;
}
assert(!fine.buf);
finish:
- if (!(flags & PIPE_FLUSH_DEFERRED)) {
+ if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) {
if (sctx->dma_cs)
ws->cs_sync_flush(sctx->dma_cs);
ws->cs_sync_flush(sctx->gfx_cs);