From ea5a74fb5892c9b6ca62054be2ee83a743103f4c Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 5 Oct 2010 16:14:11 -0400 Subject: [PATCH] r600g: userspace fence to avoid kernel call for testing bo busy status Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600.h | 4 + .../winsys/r600/drm/evergreen_hw_context.c | 7 ++ src/gallium/winsys/r600/drm/r600_hw_context.c | 73 ++++++++++++++++++- src/gallium/winsys/r600/drm/r600_priv.h | 5 +- src/gallium/winsys/r600/drm/r600d.h | 1 + src/gallium/winsys/r600/drm/radeon_bo.c | 60 ++++----------- 6 files changed, 103 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 630177d6add..24e25cec0db 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -233,6 +233,10 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; + unsigned fence; + struct list_head fenced_bo; + unsigned *cfence; + struct r600_bo *fence_bo; }; struct r600_draw { diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 1355b079450..2093a2d09c1 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -613,6 +613,13 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) r = -ENOMEM; goto out_err; } + /* save 16dwords space for fence mecanism */ + ctx->pm4_ndwords -= 16; + + r = r600_context_init_fence(ctx); + if (r) { + goto out_err; + } /* init dirty list */ LIST_INITHEAD(&ctx->dirty); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index b379499f060..7d81d734571 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -41,6 +41,44 @@ #define GROUP_FORCE_NEW_BLOCK 0 +int r600_context_init_fence(struct r600_context *ctx) +{ + ctx->fence = 1; + ctx->fence_bo = r600_bo(ctx->radeon, 4096, 0, 0); + if (ctx->fence_bo == NULL) { + return -ENOMEM; + } + ctx->cfence = r600_bo_map(ctx->radeon, ctx->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL); + *ctx->cfence = 0; + LIST_INITHEAD(&ctx->fenced_bo); + return 0; +} + +static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) +{ + for (int i = 0; i < ctx->creloc; i++) { + if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) + LIST_DELINIT(&ctx->bo[i]->fencedlist); + LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); + ctx->bo[i]->fence = ctx->fence; + ctx->bo[i]->ctx = ctx; + } +} + +static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) +{ + struct radeon_bo *bo, *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { + if (bo->fence <= *ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + } else { + bo->fence = fence; + } + } +} + int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg) { struct r600_block *block; @@ -572,6 +610,9 @@ void r600_context_fini(struct r600_context *ctx) } free(ctx->reloc); free(ctx->pm4); + if (ctx->fence_bo) { + r600_bo_reference(ctx->radeon, &ctx->fence_bo, NULL); + } memset(ctx, 0, sizeof(struct r600_context)); } @@ -691,6 +732,13 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) r = -ENOMEM; goto out_err; } + /* save 16dwords space for fence mecanism */ + ctx->pm4_ndwords -= 16; + + r = r600_context_init_fence(ctx); + if (r) { + goto out_err; + } /* init dirty list */ LIST_INITHEAD(&ctx->dirty); @@ -1019,6 +1067,7 @@ void r600_context_flush(struct r600_context *ctx) struct drm_radeon_cs drmib; struct drm_radeon_cs_chunk chunks[2]; uint64_t chunk_array[2]; + unsigned fence; int r; if (!ctx->pm4_cdwords) @@ -1028,6 +1077,18 @@ void r600_context_flush(struct r600_context *ctx) r600_context_queries_suspend(ctx); radeon_bo_pbmgr_flush_maps(ctx->radeon->kman); + + /* emit fence */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT | (5 << 8); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); + ctx->pm4[ctx->pm4_cdwords++] = ctx->fence; + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->fence_bo); + #if 1 /* emit cs */ drmib.num_chunks = 2; @@ -1043,8 +1104,18 @@ void r600_context_flush(struct r600_context *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #endif + + r600_context_update_fenced_list(ctx); + + fence = ctx->fence + 1; + if (fence < ctx->fence) { + /* wrap around */ + fence = 1; + r600_context_fence_wraparound(ctx, fence); + } + ctx->fence = fence; + /* restart */ - radeon_bo_fencelist(ctx->radeon, ctx->bo, ctx->creloc); for (int i = 0; i < ctx->creloc; i++) { ctx->bo[i]->reloc = NULL; ctx->bo[i]->last_flush = 0; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index ea2cf347785..a693a5b5ab8 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -64,9 +64,9 @@ struct radeon_bo { unsigned map_count; void *data; struct list_head fencedlist; + unsigned fence; + struct r600_context *ctx; boolean shared; - int64_t last_busy; - boolean set_busy; struct r600_reloc *reloc; unsigned reloc_id; unsigned last_flush; @@ -103,6 +103,7 @@ struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr uint32_t handle); /* r600_hw_context.c */ +int r600_context_init_fence(struct r600_context *ctx); void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index ccc9ffaf8e3..d91f7737af3 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -91,6 +91,7 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 42a23f0ab8f..836d5d77e09 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -128,7 +128,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, return bo; } - static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) { struct drm_gem_close args; @@ -158,8 +157,14 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) struct drm_radeon_gem_wait_idle args; int ret; - if (LIST_IS_EMPTY(&bo->fencedlist) && !bo->shared) + if (!bo->fence && !bo->shared) + return 0; + + if (bo->fence <= *bo->ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; return 0; + } /* Zero out args to make valgrind happy */ memset(&args, 0, sizeof(args)); @@ -171,22 +176,20 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) return ret; } -#define BO_BUSY_BACKOFF 10000 - int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain) { struct drm_radeon_gem_busy args; int ret; - int64_t now; - - now = os_time_get(); - if (LIST_IS_EMPTY(&bo->fencedlist) && !bo->shared) - return 0; - - if (bo->set_busy && (now - bo->last_busy < BO_BUSY_BACKOFF)) - return -EBUSY; - bo->set_busy = FALSE; + if (!bo->shared) { + if (!bo->fence) + return 0; + if (bo->fence <= *bo->ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + return 0; + } + } memset(&args, 0, sizeof(args)); args.handle = bo->handle; @@ -195,37 +198,6 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)); - if (ret == 0) { - struct radeon_bo *entry, *tent; - LIST_FOR_EACH_ENTRY_SAFE(entry, tent, &bo->fencedlist, fencedlist) { - LIST_DELINIT(&entry->fencedlist); - } - } else { - bo->set_busy = TRUE; - bo->last_busy = now; - } *domain = args.domain; return ret; } - -int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, - uint32_t num_bo) -{ - struct radeon_bo *first; - int i; - - first = bolist[0]; - - if (!LIST_IS_EMPTY(&first->fencedlist)) - LIST_DELINIT(&first->fencedlist); - - LIST_INITHEAD(&first->fencedlist); - - for (i = 1; i < num_bo; i++) { - if (!LIST_IS_EMPTY(&bolist[i]->fencedlist)) - LIST_DELINIT(&bolist[i]->fencedlist); - - LIST_ADDTAIL(&bolist[i]->fencedlist, &first->fencedlist); - } - return 0; -} -- 2.30.2