#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
#include "radeon_priv.h"
#include "radeon_drm.h"
#include "bof.h"
-static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned state_id)
+static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_ws_bo *bo)
{
- ctx->bo[ctx->nbo].bo = bo;
- ctx->bo[ctx->nbo].bo_flushed = 0;
- ctx->bo[ctx->nbo].state_id = state_id;
+ if (ctx->nbo >= RADEON_CTX_MAX_PM4)
+ return -EBUSY;
+ /* take a reference to the kernel bo */
+ radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->nbo], radeon_bo_pb_get_bo(bo->pb));
ctx->nbo++;
return 0;
}
-void radeon_ctx_clear(struct radeon_ctx *ctx)
+static void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement)
{
+ struct radeon_cs_reloc *greloc;
unsigned i;
- /* FIXME somethings is wrong, it should be safe to
- * delete bo here, kernel should postpone bo deletion
- * until bo is no longer referenced by cs (through the
- * fence association)
- */
- for (i = 0; i < 50; i++) {
- usleep(10);
- }
+ placement[0] = 0;
+ placement[1] = 0;
+ greloc = (void *)(((u8 *)ctx->reloc) + reloc * 4);
for (i = 0; i < ctx->nbo; i++) {
- ctx->bo[i].bo = radeon_bo_decref(ctx->radeon, ctx->bo[i].bo);
+ if (ctx->bo[i]->handle == greloc->handle) {
+ placement[0] = greloc->read_domain | greloc->write_domain;
+ placement[1] = placement[0];
+ return;
+ }
+ }
+}
+
+void radeon_ctx_clear(struct radeon_ctx *ctx)
+{
+ for (int i = 0; i < ctx->nbo; i++) {
+ radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
}
- ctx->id = 0;
- ctx->npm4 = RADEON_CTX_MAX_PM4;
+ ctx->ndwords = RADEON_CTX_MAX_PM4;
+ ctx->cdwords = 0;
ctx->nreloc = 0;
ctx->nbo = 0;
- memset(ctx->state_crc32, 0, ctx->radeon->nstate * 4);
}
-struct radeon_ctx *radeon_ctx(struct radeon *radeon)
+struct radeon_ctx *radeon_ctx_init(struct radeon *radeon)
{
struct radeon_ctx *ctx;
-
if (radeon == NULL)
return NULL;
- ctx = calloc(1, sizeof(*ctx));
- if (ctx == NULL)
- return NULL;
+ ctx = calloc(1, sizeof(struct radeon_ctx));
ctx->radeon = radeon_incref(radeon);
- ctx->max_bo = 4096;
- ctx->max_reloc = 4096;
+ radeon_ctx_clear(ctx);
ctx->pm4 = malloc(RADEON_CTX_MAX_PM4 * 4);
if (ctx->pm4 == NULL) {
- return radeon_ctx_decref(ctx);
+ radeon_ctx_fini(ctx);
+ return NULL;
}
- ctx->state_crc32 = malloc(ctx->radeon->nstate * 4);
- if (ctx->state_crc32 == NULL) {
- return radeon_ctx_decref(ctx);
+ ctx->reloc = malloc(sizeof(struct radeon_cs_reloc) * RADEON_CTX_MAX_PM4);
+ if (ctx->reloc == NULL) {
+ radeon_ctx_fini(ctx);
+ return NULL;
}
- ctx->bo = malloc(ctx->max_bo * sizeof(struct radeon_ctx_bo));
+ ctx->bo = calloc(sizeof(void *), RADEON_CTX_MAX_PM4);
if (ctx->bo == NULL) {
- return radeon_ctx_decref(ctx);
- }
- ctx->reloc = malloc(ctx->max_reloc * sizeof(struct radeon_cs_reloc));
- if (ctx->reloc == NULL) {
- return radeon_ctx_decref(ctx);
+ radeon_ctx_fini(ctx);
+ return NULL;
}
- radeon_ctx_clear(ctx);
return ctx;
}
-struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx)
+void radeon_ctx_fini(struct radeon_ctx *ctx)
{
- ctx->refcount++;
- return ctx;
-}
+ unsigned i;
-struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx)
-{
if (ctx == NULL)
- return NULL;
- if (--ctx->refcount > 0) {
- return NULL;
- }
+ return;
+ for (i = 0; i < ctx->nbo; i++) {
+ radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
+ }
ctx->radeon = radeon_decref(ctx->radeon);
free(ctx->bo);
free(ctx->pm4);
free(ctx->reloc);
- free(ctx->state_crc32);
- memset(ctx, 0, sizeof(*ctx));
free(ctx);
- return NULL;
-}
-
-static int radeon_ctx_bo_id(struct radeon_ctx *ctx, struct radeon_bo *bo)
-{
- unsigned i;
-
- for (i = 0; i < ctx->nbo; i++) {
- if (bo == ctx->bo[i].bo)
- return i;
- }
- return -1;
}
static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state)
{
unsigned i, j;
int r;
-
+ struct radeon_bo *state_bo;
if (state == NULL)
return 0;
for (i = 0; i < state->nbo; i++) {
for (j = 0; j < ctx->nbo; j++) {
- if (state->bo[i] == ctx->bo[j].bo)
+ state_bo = radeon_bo_pb_get_bo(state->bo[i]->pb);
+ if (state_bo == ctx->bo[j])
break;
}
if (j == ctx->nbo) {
- if (ctx->nbo >= ctx->max_bo) {
- return -EBUSY;
- }
- radeon_bo_incref(ctx->radeon, state->bo[i]);
- r = radeon_ctx_set_bo_new(ctx, state->bo[i], state->id);
+ r = radeon_ctx_set_bo_new(ctx, state->bo[i]);
if (r)
return r;
}
return 0;
}
+
int radeon_ctx_submit(struct radeon_ctx *ctx)
{
struct drm_radeon_cs drmib;
uint64_t chunk_array[2];
int r = 0;
- if (!ctx->id)
+ if (!ctx->cdwords)
return 0;
+
+ radeon_bo_pbmgr_flush_maps(ctx->radeon->kman);
#if 0
- for (r = 0; r < ctx->id; r++) {
+ for (r = 0; r < ctx->cdwords; r++) {
fprintf(stderr, "0x%08X\n", ctx->pm4[r]);
}
#endif
drmib.num_chunks = 2;
drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
- chunks[0].length_dw = ctx->id;
+ chunks[0].length_dw = ctx->cdwords;
chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
chunks[1].length_dw = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4;
return r;
}
-int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo,
+static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_ws_bo *bo,
unsigned id, unsigned *placement)
{
unsigned i;
+ unsigned bo_handle = radeon_ws_bo_get_handle(bo);
for (i = 0; i < ctx->nreloc; i++) {
- if (ctx->reloc[i].handle == bo->handle) {
+ if (ctx->reloc[i].handle == bo_handle) {
ctx->pm4[id] = i * sizeof(struct radeon_cs_reloc) / 4;
return 0;
}
}
- if (ctx->nreloc >= ctx->max_reloc) {
+ if (ctx->nreloc >= RADEON_CTX_MAX_PM4) {
return -EBUSY;
}
- ctx->reloc[ctx->nreloc].handle = bo->handle;
+ ctx->reloc[ctx->nreloc].handle = bo_handle;
ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1];
ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1];
ctx->reloc[ctx->nreloc].flags = 0;
static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state *state)
{
- unsigned i, rid, cid;
- u32 flags;
- int r, bo_id[4];
+ unsigned i, rid, bid, cid;
+ int r;
if (state == NULL)
return 0;
- for (i = 0; i < state->nbo; i++) {
- bo_id[i] = radeon_ctx_bo_id(ctx, state->bo[i]);
- if (bo_id[i] < 0) {
- return -EINVAL;
- }
- flags = (~ctx->bo[bo_id[i]].bo_flushed) & ctx->radeon->type[state->id].flush_flags;
- if (flags) {
- r = ctx->radeon->bo_flush(ctx, state->bo[i], flags, &state->placement[i * 2]);
- if (r) {
- return r;
- }
- }
- ctx->bo[bo_id[i]].bo_flushed |= ctx->radeon->type[state->id].flush_flags;
- }
- if ((ctx->radeon->type[state->id].header_cpm4 + state->cpm4) > ctx->npm4) {
- /* need to flush */
+ if (state->cpm4 > ctx->ndwords) {
return -EBUSY;
}
- memcpy(&ctx->pm4[ctx->id], ctx->radeon->type[state->id].header_pm4, ctx->radeon->type[state->id].header_cpm4 * 4);
- ctx->id += ctx->radeon->type[state->id].header_cpm4;
- ctx->npm4 -= ctx->radeon->type[state->id].header_cpm4;
- memcpy(&ctx->pm4[ctx->id], state->states, state->cpm4 * 4);
- for (i = 0; i < state->nbo; i++) {
+ memcpy(&ctx->pm4[ctx->cdwords], state->pm4, state->cpm4 * 4);
+ for (i = 0; i < state->nreloc; i++) {
rid = state->reloc_pm4_id[i];
- cid = ctx->id + rid;
- r = radeon_ctx_reloc(ctx, state->bo[i], cid,
- &state->placement[i * 2]);
+ bid = state->reloc_bo_id[i];
+ cid = ctx->cdwords + rid;
+ r = radeon_ctx_reloc(ctx, state->bo[bid], cid,
+ &state->placement[bid * 2]);
if (r) {
- fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->id);
+ fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->stype->stype);
return r;
}
}
- ctx->id += state->cpm4;
- ctx->npm4 -= state->cpm4;
- for (i = 0; i < state->nbo; i++) {
- ctx->bo[bo_id[i]].bo_flushed &= ~ctx->radeon->type[state->id].dirty_flags;
- }
+ ctx->cdwords += state->cpm4;
+ ctx->ndwords -= state->cpm4;
return 0;
}
int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state)
{
- unsigned ndw = 0;
int r = 0;
+ /* !!! ONLY ACCEPT QUERY STATE HERE !!! */
r = radeon_state_pm4(state);
if (r)
return r;
-
- /* !!! ONLY ACCEPT QUERY STATE HERE !!! */
- ndw = state->cpm4 + ctx->radeon->type[state->id].header_cpm4;
- switch (state->id) {
- case R600_QUERY_BEGIN:
- /* account QUERY_END at same time of QUERY_BEGIN so we know we
- * have room left for QUERY_END
- */
- if ((ndw * 2) > ctx->npm4) {
- /* need to flush */
+ /* BEGIN/END query are balanced in the same cs so account for END
+ * END query when scheduling BEGIN query
+ */
+ switch (state->stype->stype) {
+ case R600_STATE_QUERY_BEGIN:
+ /* is there enough place for begin & end */
+ if ((state->cpm4 * 2) > ctx->ndwords)
return -EBUSY;
- }
- ctx->npm4 -= ndw;
+ ctx->ndwords -= state->cpm4;
break;
- case R600_QUERY_END:
- /* add again ndw from previous accounting */
- ctx->npm4 += ndw;
+ case R600_STATE_QUERY_END:
+ ctx->ndwords += state->cpm4;
break;
default:
return -EINVAL;
}
-
return radeon_ctx_state_schedule(ctx, state);
}
int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw)
{
- unsigned i, previous_id;
+ unsigned previous_cdwords;
int r = 0;
+ int i;
- for (i = 0; i < draw->nstate; i++) {
+ for (i = 0; i < ctx->radeon->max_states; i++) {
r = radeon_ctx_state_bo(ctx, draw->state[i]);
if (r)
return r;
}
- r = radeon_draw_check(draw);
- if (r)
- return r;
- if (draw->cpm4 >= RADEON_CTX_MAX_PM4) {
- fprintf(stderr, "%s single draw too big %d, max %d\n",
- __func__, draw->cpm4, RADEON_CTX_MAX_PM4);
- return -EINVAL;
- }
- previous_id = ctx->id;
- for (i = 0; i < draw->nstate; i++) {
- /* FIXME always force draw state to schedule */
- if (draw->state[i] && draw->state[i]->pm4_crc != ctx->state_crc32[draw->state[i]->id]) {
+ previous_cdwords = ctx->cdwords;
+ for (i = 0; i < ctx->radeon->max_states; i++) {
+ if (draw->state[i]) {
r = radeon_ctx_state_schedule(ctx, draw->state[i]);
if (r) {
- ctx->id = previous_id;
+ ctx->cdwords = previous_cdwords;
return r;
}
}
}
+
return 0;
}
+#if 0
+int radeon_ctx_pm4(struct radeon_ctx *ctx)
+{
+ unsigned i;
+ int r;
+
+ free(ctx->pm4);
+ ctx->cpm4 = 0;
+ ctx->pm4 = malloc(ctx->draw_cpm4 * 4);
+ if (ctx->pm4 == NULL)
+ return -EINVAL;
+ for (i = 0, ctx->id = 0; i < ctx->nstate; i++) {
+ }
+ if (ctx->id != ctx->draw_cpm4) {
+ fprintf(stderr, "%s miss predicted pm4 size %d for %d\n",
+ __func__, ctx->draw_cpm4, ctx->id);
+ return -EINVAL;
+ }
+ ctx->cpm4 = ctx->draw_cpm4;
+ return 0;
+}
+#endif
+
void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file)
{
bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
unsigned i;
-
+ unsigned bo_size;
root = device_id = bcs = blob = array = bo = size = handle = NULL;
root = bof_object();
if (root == NULL)
bof_decref(device_id);
device_id = NULL;
/* dump relocs */
-printf("%d relocs\n", ctx->nreloc);
blob = bof_blob(ctx->nreloc * 16, ctx->reloc);
if (blob == NULL)
goto out_err;
bof_decref(blob);
blob = NULL;
/* dump cs */
-printf("%d pm4\n", ctx->id);
- blob = bof_blob(ctx->id * 4, ctx->pm4);
+ blob = bof_blob(ctx->cdwords * 4, ctx->pm4);
if (blob == NULL)
goto out_err;
if (bof_object_set(root, "pm4", blob))
bo = bof_object();
if (bo == NULL)
goto out_err;
- size = bof_int32(ctx->bo[i].bo->size);
+ bo_size = ctx->bo[i]->size;
+ size = bof_int32(bo_size);
if (size == NULL)
goto out_err;
if (bof_object_set(bo, "size", size))
goto out_err;
bof_decref(size);
size = NULL;
- handle = bof_int32(ctx->bo[i].bo->handle);
+ handle = bof_int32(ctx->bo[i]->handle);
if (handle == NULL)
goto out_err;
if (bof_object_set(bo, "handle", handle))
goto out_err;
bof_decref(handle);
handle = NULL;
- radeon_bo_map(ctx->radeon, ctx->bo[i].bo);
- blob = bof_blob(ctx->bo[i].bo->size, ctx->bo[i].bo->data);
- radeon_bo_unmap(ctx->radeon, ctx->bo[i].bo);
+ radeon_bo_map(ctx->radeon, ctx->bo[i]);
+ blob = bof_blob(bo_size, ctx->bo[i]->data);
+ radeon_bo_unmap(ctx->radeon, ctx->bo[i]);
if (blob == NULL)
goto out_err;
if (bof_object_set(bo, "data", blob))
if (bof_object_set(root, "bo", array))
goto out_err;
bof_dump_file(root, file);
-printf("done dump\n");
out_err:
bof_decref(blob);
bof_decref(array);