return &cs->compute_ib.base;
}
+static bool
+amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
+ unsigned preamble_num_dw)
+{
+ struct amdgpu_ib *ib = amdgpu_ib(rcs);
+ struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
+ struct amdgpu_winsys *ws = cs->ctx->ws;
+ struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
+ unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment);
+ struct pb_buffer *preamble_bo;
+ uint32_t *map;
+
+ /* Create the preamble IB buffer. */
+ preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_GTT_WC |
+ RADEON_FLAG_READ_ONLY);
+ if (!preamble_bo)
+ return false;
+
+ map = (uint32_t*)amdgpu_bo_map(preamble_bo, NULL,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+ if (!map) {
+ pb_reference(&preamble_bo, NULL);
+ return false;
+ }
+
+ /* Upload the preamble IB. */
+ memcpy(map, preamble_ib, preamble_num_dw * 4);
+
+ /* Pad the IB. */
+ uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
+ while (preamble_num_dw & ib_pad_dw_mask)
+ map[preamble_num_dw++] = PKT3_NOP_PAD;
+ amdgpu_bo_unmap(preamble_bo);
+
+ for (unsigned i = 0; i < 2; i++) {
+ csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN];
+ csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE;
+ csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
+ csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
+
+ csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
+ }
+
+ assert(!cs->preamble_ib_bo);
+ cs->preamble_ib_bo = preamble_bo;
+
+ amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
+ RADEON_PRIO_IB1);
+ return true;
+}
+
static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
{
return true;
if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
r = -ECANCELED;
} else {
- struct drm_amdgpu_cs_chunk chunks[6];
+ struct drm_amdgpu_cs_chunk chunks[7];
unsigned num_chunks = 0;
/* BO list */
num_chunks++;
}
+ /* IB */
+ if (cs->ib[IB_PREAMBLE].ib_bytes) {
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
+ chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+ chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
+ num_chunks++;
+ }
+
/* IB */
cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
num_chunks++;
- if (ws->secure && cs->secure)
+ if (ws->secure && cs->secure) {
+ cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
- else
+ } else {
+ cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
+ }
assert(num_chunks <= ARRAY_SIZE(chunks));
if (cs->compute_ib.ib_mapped)
amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE);
+ if (cs->preamble_ib_bo) {
+ amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
+ RADEON_PRIO_IB1);
+ }
+
cs->main.base.used_gart = 0;
cs->main.base.used_vram = 0;
amdgpu_cs_sync_flush(rcs);
util_queue_fence_destroy(&cs->flush_completed);
p_atomic_dec(&cs->ctx->ws->num_cs);
+ pb_reference(&cs->preamble_ib_bo, NULL);
pb_reference(&cs->main.big_ib_buffer, NULL);
FREE(cs->main.base.prev);
pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
+ ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
ws->base.cs_destroy = amdgpu_cs_destroy;
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
ws->base.cs_validate = amdgpu_cs_validate;