From: Marek Olšák Date: Sat, 8 Aug 2015 11:27:38 +0000 (+0200) Subject: winsys/amdgpu: split IB data into a new structure in preparation for CE X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e78170f388fdabf3b981839ae265632b974f5569;p=mesa.git winsys/amdgpu: split IB data into a new structure in preparation for CE Reviewed-by: Bas Nieuwenhuizen --- diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 1b2793a5d6b..036301ef089 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -38,11 +38,6 @@ #include #include -static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo) -{ - return (struct amdgpu_winsys_bo *)bo; -} - static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 54f5dbdc459..69ada10cc37 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -68,6 +68,12 @@ bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf); void amdgpu_bo_destroy(struct pb_buffer *_buf); void amdgpu_bo_init_functions(struct amdgpu_winsys *ws); +static inline +struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo) +{ + return (struct amdgpu_winsys_bo *)bo; +} + static inline void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst, struct amdgpu_winsys_bo *src) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 63c72fcf003..b0fe8b91b1c 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -198,7 +198,8 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) /* COMMAND SUBMISSION */ -static bool amdgpu_get_new_ib(struct amdgpu_cs *cs) +static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_ib *ib, + struct amdgpu_cs_ib_info *info) { /* Small IBs are better than big IBs, because the GPU goes idle quicker * and there is less waiting for buffers and fences. Proof: @@ -207,39 +208,36 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs) const unsigned buffer_size = 128 * 1024 * 4; const unsigned ib_size = 20 * 1024 * 4; - cs->base.cdw = 0; - cs->base.buf = NULL; + ib->base.cdw = 0; + ib->base.buf = NULL; /* Allocate a new buffer for IBs if the current buffer is all used. */ - if (!cs->big_ib_buffer || - cs->used_ib_space + ib_size > cs->big_ib_buffer->size) { - struct radeon_winsys *ws = &cs->ctx->ws->base; + if (!ib->big_ib_buffer || + ib->used_ib_space + ib_size > ib->big_ib_buffer->size) { - pb_reference(&cs->big_ib_buffer, NULL); - cs->big_ib_winsys_buffer = NULL; - cs->ib_mapped = NULL; - cs->used_ib_space = 0; + pb_reference(&ib->big_ib_buffer, NULL); + ib->ib_mapped = NULL; + ib->used_ib_space = 0; - cs->big_ib_buffer = ws->buffer_create(ws, buffer_size, + ib->big_ib_buffer = ws->buffer_create(ws, buffer_size, 4096, true, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); - if (!cs->big_ib_buffer) + if (!ib->big_ib_buffer) return false; - cs->ib_mapped = ws->buffer_map(cs->big_ib_buffer, NULL, + ib->ib_mapped = ws->buffer_map(ib->big_ib_buffer, NULL, PIPE_TRANSFER_WRITE); - if (!cs->ib_mapped) { - pb_reference(&cs->big_ib_buffer, NULL); + if (!ib->ib_mapped) { + pb_reference(&ib->big_ib_buffer, NULL); return false; } - - cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)cs->big_ib_buffer; } - cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space; - cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space); - cs->base.max_dw = ib_size / 4; + info->ib_mc_address = amdgpu_winsys_bo(ib->big_ib_buffer)->va + + ib->used_ib_space; + ib->base.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); + ib->base.max_dw = ib_size / 4; return true; } @@ -271,9 +269,6 @@ static boolean amdgpu_init_cs_context(struct amdgpu_cs *cs, break; } - cs->request.number_of_ibs = 1; - cs->request.ibs = &cs->ib; - cs->max_num_buffers = 512; cs->buffers = (struct amdgpu_cs_buffer*) CALLOC(1, cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer)); @@ -355,14 +350,17 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, return NULL; } - if (!amdgpu_get_new_ib(cs)) { + if (!amdgpu_get_new_ib(&ctx->ws->base, &cs->main, &cs->ib)) { amdgpu_destroy_cs_context(cs); FREE(cs); return NULL; } + cs->request.number_of_ibs = 1; + cs->request.ibs = &cs->ib; + p_atomic_inc(&ctx->ws->num_cs); - return &cs->base; + return &cs->main.base; } #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) @@ -617,16 +615,16 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, case RING_DMA: /* pad DMA ring to 8 DWs */ while (rcs->cdw & 7) - OUT_CS(&cs->base, 0x00000000); /* NOP packet */ + OUT_CS(rcs, 0x00000000); /* NOP packet */ break; case RING_GFX: /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */ while (rcs->cdw & 7) - OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ + OUT_CS(rcs, 0xffff1000); /* type3 nop packet */ break; case RING_UVD: while (rcs->cdw & 15) - OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ + OUT_CS(rcs, 0x80000000); /* type2 nop packet */ break; default: break; @@ -636,11 +634,11 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, fprintf(stderr, "amdgpu: command stream overflowed\n"); } - amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer, - RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); + amdgpu_cs_add_buffer(rcs, cs->main.big_ib_buffer, + RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); /* If the CS is not empty or overflowed.... */ - if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) { + if (cs->main.base.cdw && cs->main.base.cdw <= cs->main.base.max_dw && !debug_get_option_noop()) { int r; /* Use a buffer list containing all allocated buffers if requested. */ @@ -679,8 +677,8 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, goto cleanup; } - cs->ib.size = cs->base.cdw; - cs->used_ib_space += cs->base.cdw * 4; + cs->ib.size = cs->main.base.cdw; + cs->main.used_ib_space += cs->main.base.cdw * 4; amdgpu_cs_do_submission(cs, fence); @@ -691,7 +689,7 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, cleanup: amdgpu_cs_context_cleanup(cs); - amdgpu_get_new_ib(cs); + amdgpu_get_new_ib(&ws->base, &cs->main, &cs->ib); ws->num_cs_flushes++; } @@ -702,7 +700,7 @@ static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) amdgpu_destroy_cs_context(cs); p_atomic_dec(&cs->ctx->ws->num_cs); - pb_reference(&cs->big_ib_buffer, NULL); + pb_reference(&cs->main.big_ib_buffer, NULL); FREE(cs); } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index f4709e9c325..888b85b77b1 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -50,21 +50,23 @@ struct amdgpu_cs_buffer { enum radeon_bo_domain domains; }; +struct amdgpu_ib { + struct radeon_winsys_cs base; + + /* A buffer out of which new IBs are allocated. */ + struct pb_buffer *big_ib_buffer; + uint8_t *ib_mapped; + unsigned used_ib_space; +}; struct amdgpu_cs { - struct radeon_winsys_cs base; + struct amdgpu_ib main; /* must be first because this is inherited */ struct amdgpu_ctx *ctx; /* Flush CS. */ void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); void *flush_data; - /* A buffer out of which new IBs are allocated. */ - struct pb_buffer *big_ib_buffer; /* for holding the reference */ - struct amdgpu_winsys_bo *big_ib_winsys_buffer; - uint8_t *ib_mapped; - unsigned used_ib_space; - /* amdgpu_cs_submit parameters */ enum ring_type ring_type; struct amdgpu_cs_request request;