r600g: fix queries and predication

author Vadim Girlin <vadimgirlin@gmail.com>

Fri, 15 Jul 2011 03:22:20 +0000 (07:22 +0400)

committer Alex Deucher <alexander.deucher@amd.com>

Fri, 15 Jul 2011 19:42:46 +0000 (15:42 -0400)
author Vadim Girlin <vadimgirlin@gmail.com>
Fri, 15 Jul 2011 03:22:20 +0000 (07:22 +0400)
committer Alex Deucher <alexander.deucher@amd.com>
Fri, 15 Jul 2011 19:42:46 +0000 (15:42 -0400)
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h

index 2af4d311f60f788e626913dbab86b8413b7045ed..61adc7ed9886be1e3840265e75823473a883f28e 100644 (file)
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -211,14 +211,21 @@ struct r600_reloc {
   */
  struct r600_query {
         u64                                     result;
-       /* The kind of query. Currently only OQ is supported. */
+       /* The kind of query */
         unsigned                                type;
-       /* How many results have been written, in dwords. It's incremented
-        * after end_query and flush. */
-       unsigned                                num_results;
-       /* if we've flushed the query */
+       /* Offset of the first result for current query */
+       unsigned                                results_start;
+       /* Offset of the next free result after current query data */
+       unsigned                                results_end;
+       /* Size of the result */
+       unsigned                                result_size;
+       /* Count of new queries started in one stream without flushing */
+       unsigned                                queries_emitted;
+       /* State flags */
         unsigned                                state;
-       /* The buffer where query results are stored. */
+       /* The buffer where query results are stored. It's used as a ring,
+        * data blocks for current query are stored sequentially from
+        * results_start to results_end, with wrapping on the buffer end */
         struct r600_bo                  *buffer;
         unsigned                                buffer_size;
         /* linked list of queries */
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c

index 174505c75e9c731cee6a52e5328cee28c894620d..de1f5d05f4ebcb520f2be2be806d04d1df344219 100644 (file)
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -43,7 +43,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
         struct r600_query *rquery = (struct r600_query *)query;
  
         rquery->result = 0;
-       rquery->num_results = 0;
+       rquery->results_start = rquery->results_end;
         r600_query_begin(&rctx->ctx, (struct r600_query *)query);
  }
  
@@ -72,12 +72,18 @@ static void r600_render_condition(struct pipe_context *ctx,
         struct r600_query *rquery = (struct r600_query *)query;
         int wait_flag = 0;
  
+       /* If we already have nonzero result, render unconditionally */
+       if (query != NULL && rquery->result != 0)
+               return;
+
         rctx->current_render_cond = query;
         rctx->current_render_cond_mode = mode;
  
-       if (!query) {
-               rctx->ctx.predicate_drawing = false;
-               r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1);
+       if (query == NULL) {
+               if (rctx->ctx.predicate_drawing) {
+                       rctx->ctx.predicate_drawing = false;
+                       r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1);
+               }
                 return;
         }
  
@@ -88,7 +94,6 @@ static void r600_render_condition(struct pipe_context *ctx,
  
         rctx->ctx.predicate_drawing = true;
         r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
-       
  }
  
  void r600_init_query_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c

index a2f13ff086394ba906c3f62d9c9f2689d8c6b06d..922367d85f187f1177b65cc0f5a80b91ffe453cd 100644 (file)
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -1695,10 +1695,9 @@ out_err:
  
  static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
  {
+       unsigned results_base = query->results_start;
         u64 start, end;
-       u32 *results;
-       int i;
-       int size;
+       u32 *results, *current_result;
  
         if (wait)
                 results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_READ, NULL);
@@ -1707,25 +1706,31 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
         if (!results)
                 return FALSE;
  
-       /* query->num_results contains how many dwords were used for the query */
-       size = query->num_results;
-       for (i = 0; i < size; i += 4) {
-               start = (u64)results[i] | (u64)results[i + 1] << 32;
-               end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
+
+       /* count all results across all data blocks */
+       while (results_base != query->results_end) {
+               current_result = (u32*)((char*)results + results_base);
+
+               start = (u64)current_result[0] | (u64)current_result[1] << 32;
+               end = (u64)current_result[2] | (u64)current_result[3] << 32;
                 if (((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))
                      || query->type == PIPE_QUERY_TIME_ELAPSED) {
                         query->result += end - start;
                 }
+
+               results_base += 4 * 4;
+               if (results_base >= query->buffer_size)
+                       results_base = 0;
         }
-       r600_bo_unmap(ctx->radeon, query->buffer);
-       query->num_results = 0;
  
+       query->results_start = query->results_end;
+       r600_bo_unmap(ctx->radeon, query->buffer);
         return TRUE;
  }
  
  void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
  {
-       unsigned required_space, required_buffer;
+       unsigned required_space, new_results_end;
         int num_backends = r600_get_num_backends(ctx->radeon);
  
         /* query request needs 6/8 dwords for begin + 6/8 dwords for end */
@@ -1739,26 +1744,41 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
                 r600_context_flush(ctx);
         }
  
-       required_buffer = query->num_results +
-               4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1);
+       /* if it's new OQ (not resume) */
+       if (query->type == PIPE_QUERY_OCCLUSION_COUNTER &&
+               query->results_start == query->results_end) {
+               /* Count queries emitted without flushes, and flush if more than
+                * half of buffer used, to avoid overwriting results which may be
+                * still in use. */
+               if (query->state & R600_QUERY_STATE_FLUSHED) {
+                       query->queries_emitted = 1;
+               } else {
+                       if (++query->queries_emitted > query->buffer_size / query->result_size / 2)
+                               r600_context_flush(ctx);
+               }
+       }
+
+       new_results_end = query->results_end + query->result_size;
+       if (new_results_end > query->buffer_size)
+               new_results_end = 0;
  
-       /* if query buffer is full force a flush */
-       if (required_buffer*4 > query->buffer_size) {
+       /* collect current results if query buffer is full */
+       if (new_results_end == query->results_start) {
                 if (!(query->state & R600_QUERY_STATE_FLUSHED))
                         r600_context_flush(ctx);
                 r600_query_result(ctx, query, TRUE);
         }
  
-       if (query->type == PIPE_QUERY_OCCLUSION_COUNTER &&
-           num_backends > 0) {
-               /* as per info on ZPASS the driver must set the unusued DB top bits */
+       if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
                 u32 *results;
                 int i;
  
                 results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_WRITE, NULL);
                 if (results) {
-                       memset(results + query->num_results, 0, ctx->max_db * 4 * 4);
+                       results = (u32*)((char*)results + query->results_end);
+                       memset(results, 0, query->result_size);
  
+                       /* Set top bits for unused backends */
                         for (i = num_backends; i < ctx->max_db; i++) {
                                 results[(i * 4)+1] = 0x80000000;
                                 results[(i * 4)+3] = 0x80000000;
@@ -1771,14 +1791,14 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
         if (query->type == PIPE_QUERY_TIME_ELAPSED) {
                 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
                 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-               ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer);
+               ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer);
                 ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
         } else {
                 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-               ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer);
+               ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer);
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
         }
         ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
@@ -1792,50 +1812,75 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
  
  void r600_query_end(struct r600_context *ctx, struct r600_query *query)
  {
-       /* emit begin query */
+       /* emit end query */
         if (query->type == PIPE_QUERY_TIME_ELAPSED) {
                 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
                 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-               ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer);
+               ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer);
                 ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
         } else {
                 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-               ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer);
+               ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer);
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
         }
         ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
         ctx->pm4[ctx->pm4_cdwords++] = 0;
         r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
  
-       query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1);
+       query->results_end += query->result_size;
+       if (query->results_end >= query->buffer_size)
+               query->results_end = 0;
+
         query->state ^= R600_QUERY_STATE_STARTED;
         query->state |= R600_QUERY_STATE_ENDED;
         query->state &= ~R600_QUERY_STATE_FLUSHED;
+
         ctx->num_query_running--;
  }
  
  void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
                             int flag_wait)
  {
-       ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-
         if (operation == PREDICATION_OP_CLEAR) {
+               if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords)
+                       r600_context_flush(ctx);
+
+               ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
                 ctx->pm4[ctx->pm4_cdwords++] = 0;
                 ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(PREDICATION_OP_CLEAR);
         } else {
-               int results_base = query->num_results - (4 * ctx->max_db);
+               unsigned results_base = query->results_start;
+               unsigned count;
+               u32 op;
  
-               if (results_base < 0)
-                       results_base = 0;
+               /* find count of the query data blocks */
+               count = query->buffer_size + query->results_end - query->results_start;
+               if (count > query->buffer_size) count-=query->buffer_size;
+               count /= query->result_size;
  
-               ctx->pm4[ctx->pm4_cdwords++] = results_base*4 + r600_bo_offset(query->buffer);
-               ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(operation) | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW) | PREDICATION_DRAW_VISIBLE;
-               ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-               ctx->pm4[ctx->pm4_cdwords++] = 0;
-               r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+               if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords)
+                       r600_context_flush(ctx);
+
+               op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
+                               (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+
+               /* emit predicate packets for all data blocks */
+               while (results_base != query->results_end) {
+                       ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
+                       ctx->pm4[ctx->pm4_cdwords++] = results_base + r600_bo_offset(query->buffer);
+                       ctx->pm4[ctx->pm4_cdwords++] = op;
+                       ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+                       ctx->pm4[ctx->pm4_cdwords++] = 0;
+                       r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+                       results_base += query->result_size;
+                       if (results_base >= query->buffer_size)
+                               results_base = 0;
+                       /* set CONTINUE bit for all packets except the first */
+                       op |= PREDICATION_CONTINUE;
+               }
         }
  }
  
@@ -1853,6 +1898,14 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
         query->type = query_type;
         query->buffer_size = 4096;
  
+       if (query_type == PIPE_QUERY_OCCLUSION_COUNTER)
+               query->result_size = 4 * 4 * ctx->max_db;
+       else
+               query->result_size = 4 * 4;
+
+       /* adjust buffer size to simplify offsets wrapping math */
+       query->buffer_size -= query->buffer_size % query->result_size;
+
         /* As of GL4, query buffers are normally read by the CPU after
          * being written by the gpu, hence staging is probably a good
          * usage pattern.
@@ -1882,7 +1935,7 @@ boolean r600_context_query_result(struct r600_context *ctx,
  {
         uint64_t *result = (uint64_t*)vresult;
  
-       if (query->num_results && !(query->state & R600_QUERY_STATE_FLUSHED)) {
+       if (!(query->state & R600_QUERY_STATE_FLUSHED)) {
                 r600_context_flush(ctx);
         }
         if (!r600_query_result(ctx, query, wait))
@@ -1912,10 +1965,12 @@ void r600_context_queries_resume(struct r600_context *ctx, boolean flushed)
         struct r600_query *query;
  
         LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
+               if (flushed)
+                       query->state |= R600_QUERY_STATE_FLUSHED;
+
                 if (query->state & R600_QUERY_STATE_SUSPENDED) {
                         r600_query_begin(ctx, query);
                         query->state ^= R600_QUERY_STATE_SUSPENDED;
-               } else if (flushed && query->state==R600_QUERY_STATE_ENDED)
-                       query->state |= R600_QUERY_STATE_FLUSHED;
+               }
         }
  }
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h

index 80424818044b81c2b8f9cf4699257148954e0b45..4a19dcf8ddf359e17fbc4d8013d824f5e071aed2 100644 (file)
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ b/src/gallium/winsys/r600/drm/r600d.h
@@ -114,6 +114,8 @@
  
  #define PRED_OP(x) ((x) << 16)
  
+#define PREDICATION_CONTINUE (1 << 31)
+
  #define PREDICATION_HINT_WAIT (0 << 12)
  #define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
author	Vadim Girlin <vadimgirlin@gmail.com>
	Fri, 15 Jul 2011 03:22:20 +0000 (07:22 +0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Fri, 15 Jul 2011 19:42:46 +0000 (15:42 -0400)
src/gallium/drivers/r600/r600.h		patch \| blob \| history
src/gallium/drivers/r600/r600_query.c		patch \| blob \| history
src/gallium/winsys/r600/drm/r600_hw_context.c		patch \| blob \| history
src/gallium/winsys/r600/drm/r600d.h		patch \| blob \| history