X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fgallium%2Fwinsys%2Famdgpu%2Fdrm%2Famdgpu_cs.c;fp=src%2Fgallium%2Fwinsys%2Famdgpu%2Fdrm%2Famdgpu_cs.c;h=7f097c50cfc3ef48f22794e4430a4303111a242e;hp=05e77b03325856c78e43e82c712f6f2a23b6f479;hb=9e2113c6dc132707db19461b77c8001b5475156a;hpb=b8892bc81820884cd42ada8699d0c28cb8e39dda diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 05e77b03325..7f097c50cfc 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1034,6 +1034,60 @@ amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib, return &cs->compute_ib.base; } +static bool +amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, + unsigned preamble_num_dw) +{ + struct amdgpu_ib *ib = amdgpu_ib(rcs); + struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib); + struct amdgpu_winsys *ws = cs->ctx->ws; + struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2}; + unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment); + struct pb_buffer *preamble_bo; + uint32_t *map; + + /* Create the preamble IB buffer. */ + preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_GTT_WC | + RADEON_FLAG_READ_ONLY); + if (!preamble_bo) + return false; + + map = (uint32_t*)amdgpu_bo_map(preamble_bo, NULL, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!map) { + pb_reference(&preamble_bo, NULL); + return false; + } + + /* Upload the preamble IB. */ + memcpy(map, preamble_ib, preamble_num_dw * 4); + + /* Pad the IB. */ + uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type]; + while (preamble_num_dw & ib_pad_dw_mask) + map[preamble_num_dw++] = PKT3_NOP_PAD; + amdgpu_bo_unmap(preamble_bo); + + for (unsigned i = 0; i < 2; i++) { + csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN]; + csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE; + csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va; + csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4; + + csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT; + } + + assert(!cs->preamble_ib_bo); + cs->preamble_ib_bo = preamble_bo; + + amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0, + RADEON_PRIO_IB1); + return true; +} + static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs) { return true; @@ -1446,7 +1500,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) { r = -ECANCELED; } else { - struct drm_amdgpu_cs_chunk chunks[6]; + struct drm_amdgpu_cs_chunk chunks[7]; unsigned num_chunks = 0; /* BO list */ @@ -1589,6 +1643,14 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) num_chunks++; } + /* IB */ + if (cs->ib[IB_PREAMBLE].ib_bytes) { + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE]; + num_chunks++; + } + /* IB */ cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; @@ -1596,10 +1658,13 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN]; num_chunks++; - if (ws->secure && cs->secure) + if (ws->secure && cs->secure) { + cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE; cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE; - else + } else { + cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE; cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE; + } assert(num_chunks <= ARRAY_SIZE(chunks)); @@ -1784,6 +1849,11 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, if (cs->compute_ib.ib_mapped) amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE); + if (cs->preamble_ib_bo) { + amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0, + RADEON_PRIO_IB1); + } + cs->main.base.used_gart = 0; cs->main.base.used_vram = 0; @@ -1802,6 +1872,7 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs) amdgpu_cs_sync_flush(rcs); util_queue_fence_destroy(&cs->flush_completed); p_atomic_dec(&cs->ctx->ws->num_cs); + pb_reference(&cs->preamble_ib_bo, NULL); pb_reference(&cs->main.big_ib_buffer, NULL); FREE(cs->main.base.prev); pb_reference(&cs->compute_ib.big_ib_buffer, NULL); @@ -1829,6 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib; + ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption; ws->base.cs_destroy = amdgpu_cs_destroy; ws->base.cs_add_buffer = amdgpu_cs_add_buffer; ws->base.cs_validate = amdgpu_cs_validate;