draw: corrections to allow for different cliptest cases

[mesa.git] / src / gallium / winsys / r600 / drm / radeon_ctx.c
diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c

index b8ba9b552dfab744f202afdc7e2804173e793fc9..7ccb52459055729b564f3916619e3a5a3cd3bb3d 100644 (file)
--- a/src/gallium/winsys/r600/drm/radeon_ctx.c
+++ b/src/gallium/winsys/r600/drm/radeon_ctx.c
@@ -26,127 +26,106 @@
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-#include <unistd.h>
  #include "radeon_priv.h"
  #include "radeon_drm.h"
  #include "bof.h"
  
-static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned state_id)
+static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_ws_bo *bo)
  {
-       ctx->bo[ctx->nbo].bo = bo;
-       ctx->bo[ctx->nbo].bo_flushed = 0;
-       ctx->bo[ctx->nbo].state_id = state_id;
+       if (ctx->nbo >= RADEON_CTX_MAX_PM4)
+               return -EBUSY;
+       /* take a reference to the kernel bo */
+       radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->nbo], radeon_bo_pb_get_bo(bo->pb));
         ctx->nbo++;
         return 0;
  }
  
-void radeon_ctx_clear(struct radeon_ctx *ctx)
+static void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement)
  {
+       struct radeon_cs_reloc *greloc;
         unsigned i;
  
-       /* FIXME somethings is wrong, it should be safe to
-        * delete bo here, kernel should postpone bo deletion
-        * until bo is no longer referenced by cs (through the
-        * fence association)
-        */
-       for (i = 0; i < 50; i++) {
-               usleep(10);
-       }
+       placement[0] = 0;
+       placement[1] = 0;
+       greloc = (void *)(((u8 *)ctx->reloc) + reloc * 4);
         for (i = 0; i < ctx->nbo; i++) {
-               ctx->bo[i].bo = radeon_bo_decref(ctx->radeon, ctx->bo[i].bo);
+               if (ctx->bo[i]->handle == greloc->handle) {
+                       placement[0] = greloc->read_domain | greloc->write_domain;
+                       placement[1] = placement[0];
+                       return;
+               }
+       }
+}
+
+void radeon_ctx_clear(struct radeon_ctx *ctx)
+{
+       for (int i = 0; i < ctx->nbo; i++) {
+               radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
         }
-       ctx->id = 0;
-       ctx->npm4 = RADEON_CTX_MAX_PM4;
+       ctx->ndwords = RADEON_CTX_MAX_PM4;
+       ctx->cdwords = 0;
         ctx->nreloc = 0;
         ctx->nbo = 0;
-       memset(ctx->state_crc32, 0, ctx->radeon->nstate * 4);
  }
  
-struct radeon_ctx *radeon_ctx(struct radeon *radeon)
+struct radeon_ctx *radeon_ctx_init(struct radeon *radeon)
  {
         struct radeon_ctx *ctx;
-
         if (radeon == NULL)
                 return NULL;
-       ctx = calloc(1, sizeof(*ctx));
-       if (ctx == NULL)
-               return NULL;
+       ctx = calloc(1, sizeof(struct radeon_ctx));
         ctx->radeon = radeon_incref(radeon);
-       ctx->max_bo = 4096;
-       ctx->max_reloc = 4096;
+       radeon_ctx_clear(ctx);
         ctx->pm4 = malloc(RADEON_CTX_MAX_PM4 * 4);
         if (ctx->pm4 == NULL) {
-               return radeon_ctx_decref(ctx);
+               radeon_ctx_fini(ctx);
+               return NULL;
         }
-       ctx->state_crc32 = malloc(ctx->radeon->nstate * 4);
-       if (ctx->state_crc32 == NULL) {
-               return radeon_ctx_decref(ctx);
+       ctx->reloc = malloc(sizeof(struct radeon_cs_reloc) * RADEON_CTX_MAX_PM4);
+       if (ctx->reloc == NULL) {
+               radeon_ctx_fini(ctx);
+               return NULL;
         }
-       ctx->bo = malloc(ctx->max_bo * sizeof(struct radeon_ctx_bo));
+       ctx->bo = calloc(sizeof(void *), RADEON_CTX_MAX_PM4);
         if (ctx->bo == NULL) {
-               return radeon_ctx_decref(ctx);
-       }
-       ctx->reloc = malloc(ctx->max_reloc * sizeof(struct radeon_cs_reloc));
-       if (ctx->reloc == NULL) {
-               return radeon_ctx_decref(ctx);
+               radeon_ctx_fini(ctx);
+               return NULL;
         }
-       radeon_ctx_clear(ctx);
         return ctx;
  }
  
-struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx)
+void radeon_ctx_fini(struct radeon_ctx *ctx)
  {
-       ctx->refcount++;
-       return ctx;
-}
+       unsigned i;
  
-struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx)
-{
         if (ctx == NULL)
-               return NULL;
-       if (--ctx->refcount > 0) {
-               return NULL;
-       }
+               return;
  
+       for (i = 0; i < ctx->nbo; i++) {
+               radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
+       }
         ctx->radeon = radeon_decref(ctx->radeon);
         free(ctx->bo);
         free(ctx->pm4);
         free(ctx->reloc);
-       free(ctx->state_crc32);
-       memset(ctx, 0, sizeof(*ctx));
         free(ctx);
-       return NULL;
-}
-
-static int radeon_ctx_bo_id(struct radeon_ctx *ctx, struct radeon_bo *bo)
-{
-       unsigned i;
-
-       for (i = 0; i < ctx->nbo; i++) {
-               if (bo == ctx->bo[i].bo)
-                       return i;
-       }
-       return -1;
  }
  
  static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state)
  {
         unsigned i, j;
         int r;
-
+       struct radeon_bo *state_bo;
         if (state == NULL)
                 return 0;
         for (i = 0; i < state->nbo; i++) {
                 for (j = 0; j < ctx->nbo; j++) {
-                       if (state->bo[i] == ctx->bo[j].bo)
+                       state_bo = radeon_bo_pb_get_bo(state->bo[i]->pb);
+                       if (state_bo == ctx->bo[j])
                                 break;
                 }
                 if (j == ctx->nbo) {
-                       if (ctx->nbo >= ctx->max_bo) {
-                               return -EBUSY;
-                       }
-                       radeon_bo_incref(ctx->radeon, state->bo[i]);
-                       r = radeon_ctx_set_bo_new(ctx, state->bo[i], state->id);
+                       r = radeon_ctx_set_bo_new(ctx, state->bo[i]);
                         if (r)
                                 return r;
                 }
@@ -154,6 +133,7 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat
         return 0;
  }
  
+
  int radeon_ctx_submit(struct radeon_ctx *ctx)
  {
         struct drm_radeon_cs drmib;
@@ -161,17 +141,19 @@ int radeon_ctx_submit(struct radeon_ctx *ctx)
         uint64_t chunk_array[2];
         int r = 0;
  
-       if (!ctx->id)
+       if (!ctx->cdwords)
                 return 0;
+
+       radeon_bo_pbmgr_flush_maps(ctx->radeon->kman);
  #if 0
-       for (r = 0; r < ctx->id; r++) {
+       for (r = 0; r < ctx->cdwords; r++) {
                 fprintf(stderr, "0x%08X\n", ctx->pm4[r]);
         }
  #endif
         drmib.num_chunks = 2;
         drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
         chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
-       chunks[0].length_dw = ctx->id;
+       chunks[0].length_dw = ctx->cdwords;
         chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
         chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
         chunks[1].length_dw = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4;
@@ -185,21 +167,22 @@ int radeon_ctx_submit(struct radeon_ctx *ctx)
         return r;
  }
  
-int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo,
+static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_ws_bo *bo,
                         unsigned id, unsigned *placement)
  {
         unsigned i;
+       unsigned bo_handle = radeon_ws_bo_get_handle(bo);
  
         for (i = 0; i < ctx->nreloc; i++) {
-               if (ctx->reloc[i].handle == bo->handle) {
+               if (ctx->reloc[i].handle == bo_handle) {
                         ctx->pm4[id] = i * sizeof(struct radeon_cs_reloc) / 4;
                         return 0;
                 }
         }
-       if (ctx->nreloc >= ctx->max_reloc) {
+       if (ctx->nreloc >= RADEON_CTX_MAX_PM4) {
                 return -EBUSY;
         }
-       ctx->reloc[ctx->nreloc].handle = bo->handle;
+       ctx->reloc[ctx->nreloc].handle = bo_handle;
         ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1];
         ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1];
         ctx->reloc[ctx->nreloc].flags = 0;
@@ -210,122 +193,111 @@ int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo,
  
  static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state *state)
  {
-       unsigned i, rid, cid;
-       u32 flags;
-       int r, bo_id[4];
+       unsigned i, rid, bid, cid;
+       int r;
  
         if (state == NULL)
                 return 0;
-       for (i = 0; i < state->nbo; i++) {
-               bo_id[i] = radeon_ctx_bo_id(ctx, state->bo[i]);
-               if (bo_id[i] < 0) {
-                       return -EINVAL;
-               }
-               flags = (~ctx->bo[bo_id[i]].bo_flushed) & ctx->radeon->type[state->id].flush_flags;
-               if (flags) {
-                       r = ctx->radeon->bo_flush(ctx, state->bo[i], flags, &state->placement[i * 2]);
-                       if (r) {
-                               return r;
-                       }
-               }
-               ctx->bo[bo_id[i]].bo_flushed |= ctx->radeon->type[state->id].flush_flags;
-       }
-       if ((ctx->radeon->type[state->id].header_cpm4 + state->cpm4) > ctx->npm4) {
-               /* need to flush */
+       if (state->cpm4 > ctx->ndwords) {
                 return -EBUSY;
         }
-       memcpy(&ctx->pm4[ctx->id], ctx->radeon->type[state->id].header_pm4, ctx->radeon->type[state->id].header_cpm4 * 4);
-       ctx->id += ctx->radeon->type[state->id].header_cpm4;
-       ctx->npm4 -= ctx->radeon->type[state->id].header_cpm4;
-       memcpy(&ctx->pm4[ctx->id], state->states, state->cpm4 * 4);
-       for (i = 0; i < state->nbo; i++) {
+       memcpy(&ctx->pm4[ctx->cdwords], state->pm4, state->cpm4 * 4);
+       for (i = 0; i < state->nreloc; i++) {
                 rid = state->reloc_pm4_id[i];
-               cid = ctx->id + rid;
-               r = radeon_ctx_reloc(ctx, state->bo[i], cid,
-                                       &state->placement[i * 2]);
+               bid = state->reloc_bo_id[i];
+               cid = ctx->cdwords + rid;
+               r = radeon_ctx_reloc(ctx, state->bo[bid], cid,
+                                       &state->placement[bid * 2]);
                 if (r) {
-                       fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->id);
+                       fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->stype->stype);
                         return r;
                 }
         }
-       ctx->id += state->cpm4;
-       ctx->npm4 -= state->cpm4;
-       for (i = 0; i < state->nbo; i++) {
-               ctx->bo[bo_id[i]].bo_flushed &= ~ctx->radeon->type[state->id].dirty_flags;
-       }
+       ctx->cdwords += state->cpm4;
+       ctx->ndwords -= state->cpm4;
         return 0;
  }
  
  int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state)
  {
-       unsigned ndw = 0;
         int r = 0;
  
+       /* !!! ONLY ACCEPT QUERY STATE HERE !!! */
         r = radeon_state_pm4(state);
         if (r)
                 return r;
-
-       /* !!! ONLY ACCEPT QUERY STATE HERE !!! */
-       ndw = state->cpm4 + ctx->radeon->type[state->id].header_cpm4;
-       switch (state->id) {
-       case R600_QUERY_BEGIN:
-               /* account QUERY_END at same time of QUERY_BEGIN so we know we
-                * have room left for QUERY_END
-                */
-               if ((ndw * 2) > ctx->npm4) {
-                       /* need to flush */
+       /* BEGIN/END query are balanced in the same cs so account for END
+        * END query when scheduling BEGIN query
+        */
+       switch (state->stype->stype) {
+       case R600_STATE_QUERY_BEGIN:
+               /* is there enough place for begin & end */
+               if ((state->cpm4 * 2) > ctx->ndwords)
                         return -EBUSY;
-               }
-               ctx->npm4 -= ndw;
+               ctx->ndwords -= state->cpm4;
                 break;
-       case R600_QUERY_END:
-               /* add again ndw from previous accounting */
-               ctx->npm4 += ndw;
+       case R600_STATE_QUERY_END:
+               ctx->ndwords += state->cpm4;
                 break;
         default:
                 return -EINVAL;
         }
-
         return radeon_ctx_state_schedule(ctx, state);
  }
  
  int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw)
  {
-       unsigned i, previous_id;
+       unsigned previous_cdwords;
         int r = 0;
+       int i;
  
-       for (i = 0; i < draw->nstate; i++) {
+       for (i = 0; i < ctx->radeon->max_states; i++) {
                 r = radeon_ctx_state_bo(ctx, draw->state[i]);
                 if (r)
                         return r;
         }
-       r = radeon_draw_check(draw);
-       if (r)
-               return r;
-       if (draw->cpm4 >= RADEON_CTX_MAX_PM4) {
-               fprintf(stderr, "%s single draw too big %d, max %d\n",
-                       __func__, draw->cpm4, RADEON_CTX_MAX_PM4);
-               return -EINVAL;
-       }
-       previous_id = ctx->id;
-       for (i = 0; i < draw->nstate; i++) {
-               /* FIXME always force draw state to schedule */
-               if (draw->state[i] && draw->state[i]->pm4_crc != ctx->state_crc32[draw->state[i]->id]) {
+       previous_cdwords = ctx->cdwords;
+       for (i = 0; i < ctx->radeon->max_states; i++) {
+               if (draw->state[i]) {
                         r = radeon_ctx_state_schedule(ctx, draw->state[i]);
                         if (r) {
-                               ctx->id = previous_id;
+                               ctx->cdwords = previous_cdwords;
                                 return r;
                         }
                 }
         }
+
         return 0;
  }
  
+#if 0
+int radeon_ctx_pm4(struct radeon_ctx *ctx)
+{
+       unsigned i;
+       int r;
+
+       free(ctx->pm4);
+       ctx->cpm4 = 0;
+       ctx->pm4 = malloc(ctx->draw_cpm4 * 4);
+       if (ctx->pm4 == NULL)
+               return -EINVAL;
+       for (i = 0, ctx->id = 0; i < ctx->nstate; i++) {
+       }
+       if (ctx->id != ctx->draw_cpm4) {
+               fprintf(stderr, "%s miss predicted pm4 size %d for %d\n",
+                       __func__, ctx->draw_cpm4, ctx->id);
+               return -EINVAL;
+       }
+       ctx->cpm4 = ctx->draw_cpm4;
+       return 0;
+}
+#endif
+
  void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file)
  {
         bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
         unsigned i;
-
+       unsigned bo_size;
         root = device_id = bcs = blob = array = bo = size = handle = NULL;
         root = bof_object();
         if (root == NULL)
@@ -338,7 +310,6 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file)
         bof_decref(device_id);
         device_id = NULL;
         /* dump relocs */
-printf("%d relocs\n", ctx->nreloc);
         blob = bof_blob(ctx->nreloc * 16, ctx->reloc);
         if (blob == NULL)
                 goto out_err;
@@ -347,8 +318,7 @@ printf("%d relocs\n", ctx->nreloc);
         bof_decref(blob);
         blob = NULL;
         /* dump cs */
-printf("%d pm4\n", ctx->id);
-       blob = bof_blob(ctx->id * 4, ctx->pm4);
+       blob = bof_blob(ctx->cdwords * 4, ctx->pm4);
         if (blob == NULL)
                 goto out_err;
         if (bof_object_set(root, "pm4", blob))
@@ -363,23 +333,24 @@ printf("%d pm4\n", ctx->id);
                 bo = bof_object();
                 if (bo == NULL)
                         goto out_err;
-               size = bof_int32(ctx->bo[i].bo->size);
+               bo_size = ctx->bo[i]->size;
+               size = bof_int32(bo_size);
                 if (size == NULL)
                         goto out_err;
                 if (bof_object_set(bo, "size", size))
                         goto out_err;
                 bof_decref(size);
                 size = NULL;
-               handle = bof_int32(ctx->bo[i].bo->handle);
+               handle = bof_int32(ctx->bo[i]->handle);
                 if (handle == NULL)
                         goto out_err;
                 if (bof_object_set(bo, "handle", handle))
                         goto out_err;
                 bof_decref(handle);
                 handle = NULL;
-               radeon_bo_map(ctx->radeon, ctx->bo[i].bo);
-               blob = bof_blob(ctx->bo[i].bo->size, ctx->bo[i].bo->data);
-               radeon_bo_unmap(ctx->radeon, ctx->bo[i].bo);
+               radeon_bo_map(ctx->radeon, ctx->bo[i]);
+               blob = bof_blob(bo_size, ctx->bo[i]->data);
+               radeon_bo_unmap(ctx->radeon, ctx->bo[i]);
                 if (blob == NULL)
                         goto out_err;
                 if (bof_object_set(bo, "data", blob))
@@ -394,7 +365,6 @@ printf("%d pm4\n", ctx->id);
         if (bof_object_set(root, "bo", array))
                 goto out_err;
         bof_dump_file(root, file);
-printf("done dump\n");
  out_err:
         bof_decref(blob);
         bof_decref(array);