r600g: rework queries
authorMarek Olšák <maraeo@gmail.com>
Tue, 21 Feb 2012 23:25:55 +0000 (00:25 +0100)
committerMarek Olšák <maraeo@gmail.com>
Thu, 23 Feb 2012 19:29:56 +0000 (20:29 +0100)
We always mapped the query buffer in begin_query, causing stalls
if the buffer was busy.

This commit reworks it such that the query buffer is only mapped
in get_query_result as it's supposed to be.

The query buffer is no longer treated as a ring buffer. Instead, the results
are just appended and when the buffer is full, we create a new one. One query
can have more than one query buffer, though that's a very rare case.
Begin_query releases all query buffers.

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/r600/r600.h
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_query.c

index deba53f6ac83ff00b06e018b9bbb3bc93e2fcced..1db9b283dfaf0dd6d13cfbaa248ddcf6d4ac62c6 100644 (file)
@@ -157,25 +157,31 @@ struct r600_range {
        struct r600_block       **blocks;
 };
 
-struct r600_query {
-       union {
-               uint64_t                        u64;
-               boolean                         b;
-               struct pipe_query_data_so_statistics so;
-       } result;
-       /* The kind of query */
-       unsigned                                type;
-       /* Offset of the first result for current query */
-       unsigned                                results_start;
+struct r600_query_buffer {
+       /* The buffer where query results are stored. */
+       struct r600_resource                    *buf;
        /* Offset of the next free result after current query data */
        unsigned                                results_end;
+       /* If a query buffer is full, a new buffer is created and the old one
+        * is put in here. When we calculate the result, we sum up the samples
+        * from all buffers. */
+       struct r600_query_buffer                *previous;
+};
+
+union r600_query_result {
+       uint64_t                        u64;
+       boolean                         b;
+       struct pipe_query_data_so_statistics so;
+};
+
+struct r600_query {
+       /* The query buffer and how many results are in it. */
+       struct r600_query_buffer                buffer;
+       /* The type of query */
+       unsigned                                type;
        /* Size of the result in memory for both begin_query and end_query,
         * this can be one or two numbers, or it could even be a size of a structure. */
        unsigned                                result_size;
-       /* The buffer where query results are stored. It's used as a ring,
-        * data blocks for current query are stored sequentially from
-        * results_start to results_end, with wrapping on the buffer end */
-       struct r600_resource                    *buffer;
        /* The number of dwords for begin_query or end_query. */
        unsigned                                num_cs_dw;
        /* linked list of queries */
@@ -214,6 +220,7 @@ void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *que
 boolean r600_context_query_result(struct r600_context *ctx,
                                struct r600_query *query,
                                boolean wait, void *vresult);
+struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type);
 void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
 void r600_query_end(struct r600_context *ctx, struct r600_query *query);
 void r600_context_queries_suspend(struct r600_context *ctx);
index c1fa9db08a0a668a7de4be4d2ea2badff836fa4b..8c2e1837ff41ae0ebe3b44f8d7d793a144c400ec 100644 (file)
@@ -1349,12 +1349,16 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned
        return 0;
 }
 
-static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
+static boolean r600_query_buffer_result(struct r600_context *ctx,
+                                       struct r600_query *query,
+                                       struct r600_query_buffer *qbuf,
+                                       boolean wait,
+                                       union r600_query_result *result)
 {
-       unsigned results_base = query->results_start;
+       unsigned results_base = 0;
        char *map;
 
-       map = ctx->ws->buffer_map(query->buffer->buf, ctx->cs,
+       map = ctx->ws->buffer_map(qbuf->buf->buf, ctx->cs,
                                  PIPE_TRANSFER_READ |
                                  (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
        if (!map)
@@ -1363,24 +1367,24 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
        /* count all results across all data blocks */
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
-               while (results_base != query->results_end) {
-                       query->result.u64 +=
+               while (results_base != qbuf->results_end) {
+                       result->u64 +=
                                r600_query_read_result(map + results_base, 0, 2, true);
-                       results_base = (results_base + 16) % query->buffer->b.b.b.width0;
+                       results_base += 16;
                }
                break;
        case PIPE_QUERY_OCCLUSION_PREDICATE:
-               while (results_base != query->results_end) {
-                       query->result.b = query->result.b ||
+               while (results_base != qbuf->results_end) {
+                       result->b = result->b ||
                                r600_query_read_result(map + results_base, 0, 2, true) != 0;
-                       results_base = (results_base + 16) % query->buffer->b.b.b.width0;
+                       results_base += 16;
                }
                break;
        case PIPE_QUERY_TIME_ELAPSED:
-               while (results_base != query->results_end) {
-                       query->result.u64 +=
+               while (results_base != qbuf->results_end) {
+                       result->u64 +=
                                r600_query_read_result(map + results_base, 0, 2, false);
-                       results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+                       results_base += query->result_size;
                }
                break;
        case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -1390,79 +1394,84 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
                 *    u64 PrimitiveStorageNeeded;
                 * }
                 * We only need NumPrimitivesWritten here. */
-               while (results_base != query->results_end) {
-                       query->result.u64 +=
+               while (results_base != qbuf->results_end) {
+                       result->u64 +=
                                r600_query_read_result(map + results_base, 2, 6, true);
-                       results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+                       results_base += query->result_size;
                }
                break;
        case PIPE_QUERY_PRIMITIVES_GENERATED:
                /* Here we read PrimitiveStorageNeeded. */
-               while (results_base != query->results_end) {
-                       query->result.u64 +=
+               while (results_base != qbuf->results_end) {
+                       result->u64 +=
                                r600_query_read_result(map + results_base, 0, 4, true);
-                       results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+                       results_base += query->result_size;
                }
                break;
        case PIPE_QUERY_SO_STATISTICS:
-               while (results_base != query->results_end) {
-                       query->result.so.num_primitives_written +=
+               while (results_base != qbuf->results_end) {
+                       result->so.num_primitives_written +=
                                r600_query_read_result(map + results_base, 2, 6, true);
-                       query->result.so.primitives_storage_needed +=
+                       result->so.primitives_storage_needed +=
                                r600_query_read_result(map + results_base, 0, 4, true);
-                       results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+                       results_base += query->result_size;
                }
                break;
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-               while (results_base != query->results_end) {
-                       query->result.b = query->result.b ||
+               while (results_base != qbuf->results_end) {
+                       result->b = result->b ||
                                r600_query_read_result(map + results_base, 2, 6, true) !=
                                r600_query_read_result(map + results_base, 0, 4, true);
-                       results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+                       results_base += query->result_size;
                }
                break;
        default:
                assert(0);
        }
 
-       query->results_start = query->results_end;
-       ctx->ws->buffer_unmap(query->buffer->buf);
+       ctx->ws->buffer_unmap(qbuf->buf->buf);
        return TRUE;
 }
 
-void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
+static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait,
+                                union r600_query_result *result)
 {
-       struct radeon_winsys_cs *cs = ctx->cs;
-       unsigned new_results_end, i;
-       uint32_t *results;
-       uint64_t va;
-
-       r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
-
-       new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.b.width0;
+       struct r600_query_buffer *qbuf;
 
-       /* collect current results if query buffer is full */
-       if (new_results_end == query->results_start) {
-               r600_query_result(ctx, query, TRUE);
+       for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+               if (!r600_query_buffer_result(ctx, query, qbuf, wait, result)) {
+                       return FALSE;
+               }
        }
+       return TRUE;
+}
 
-       switch (query->type) {
+struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type)
+{
+       unsigned j, i, num_results, buf_size = 4096;
+       uint32_t *results;
+       struct r600_resource *buf = (struct r600_resource*)
+               pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
+                                  PIPE_USAGE_STAGING, buf_size);
+
+       switch (type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
-               results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE);
-               if (results) {
-                       results = (uint32_t*)((char*)results + query->results_end);
-                       memset(results, 0, query->result_size);
+               results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
+               memset(results, 0, buf_size);
 
-                       /* Set top bits for unused backends */
+               /* Set top bits for unused backends. */
+               num_results = buf_size / (16 * ctx->max_db);
+               for (j = 0; j < num_results; j++) {
                        for (i = 0; i < ctx->max_db; i++) {
                                if (!(ctx->backend_mask & (1<<i))) {
                                        results[(i * 4)+1] = 0x80000000;
                                        results[(i * 4)+3] = 0x80000000;
                                }
                        }
-                       ctx->ws->buffer_unmap(query->buffer->buf);
+                       results += 4 * ctx->max_db;
                }
+               ctx->ws->buffer_unmap(buf->buf);
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                break;
@@ -1470,18 +1479,35 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_PRIMITIVES_GENERATED:
        case PIPE_QUERY_SO_STATISTICS:
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-               results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE);
-               results = (uint32_t*)((char*)results + query->results_end);
-               memset(results, 0, query->result_size);
-               ctx->ws->buffer_unmap(query->buffer->buf);
+               results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
+               memset(results, 0, buf_size);
+               ctx->ws->buffer_unmap(buf->buf);
                break;
        default:
                assert(0);
        }
+       return buf;
+}
+
+void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+       uint64_t va;
+
+       r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
+
+       /* Get a new query buffer if needed. */
+       if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.b.width0) {
+               struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
+               *qbuf = query->buffer;
+               query->buffer.buf = r600_new_query_buffer(ctx, query->type);
+               query->buffer.results_end = 0;
+               query->buffer.previous = qbuf;
+       }
 
        /* emit begin query */
-       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
-       va += query->results_end;
+       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
+       va += query->buffer.results_end;
 
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -1497,8 +1523,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
-               cs->buf[cs->cdw++] = query->results_end;
-               cs->buf[cs->cdw++] = 0;
+               cs->buf[cs->cdw++] = va;
+               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
@@ -1512,7 +1538,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
                assert(0);
        }
        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
+       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
 
        ctx->num_cs_dw_queries_suspend += query->num_cs_dw;
 }
@@ -1522,12 +1548,12 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
        struct radeon_winsys_cs *cs = ctx->cs;
        uint64_t va;
 
-       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
        /* emit end query */
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
-               va += query->results_end + 8;
+               va += query->buffer.results_end + 8;
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
                cs->buf[cs->cdw++] = va;
@@ -1539,11 +1565,11 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
-               cs->buf[cs->cdw++] = query->results_end + query->result_size/2;
+               cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2;
                cs->buf[cs->cdw++] = 0;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
-               va += query->results_end + query->result_size/2;
+               va += query->buffer.results_end + query->result_size/2;
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
                cs->buf[cs->cdw++] = va;
@@ -1555,9 +1581,9 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
                assert(0);
        }
        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
+       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
 
-       query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.b.width0;
+       query->buffer.results_end += query->result_size;
        ctx->num_cs_dw_queries_suspend -= query->num_cs_dw;
 }
 
@@ -1565,7 +1591,6 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
                            int flag_wait)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
-       uint64_t va;
 
        if (operation == PREDICATION_OP_CLEAR) {
                r600_need_cs_space(ctx, 3, FALSE);
@@ -1574,40 +1599,44 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
                cs->buf[cs->cdw++] = 0;
                cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
        } else {
-               unsigned results_base = query->results_start;
+               struct r600_query_buffer *qbuf;
                unsigned count;
                uint32_t op;
 
-               /* find count of the query data blocks */
-               count = (query->buffer->b.b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.b.width0;
-               count /= query->result_size;
+               /* Find how many results there are. */
+               count = 0;
+               for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+                       count += qbuf->results_end / query->result_size;
+               }
 
                r600_need_cs_space(ctx, 5 * count, TRUE);
 
                op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
                                (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
-               va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
 
                /* emit predicate packets for all data blocks */
-               while (results_base != query->results_end) {
-                       cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-                       cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
-                       cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer,
-                                                                            RADEON_USAGE_READ);
-                       results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+               for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+                       unsigned results_base = 0;
+                       uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b.b);
+
+                       while (results_base < qbuf->results_end) {
+                               cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
+                               cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
+                               cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
+                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                               cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ);
+                               results_base += query->result_size;
 
-                       /* set CONTINUE bit for all packets except the first */
-                       op |= PREDICATION_CONTINUE;
-               }
+                               /* set CONTINUE bit for all packets except the first */
+                               op |= PREDICATION_CONTINUE;
+                       }
+               } while (qbuf);
        }
 }
 
 struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
 {
        struct r600_query *query;
-       unsigned buffer_size = 4096;
 
        query = CALLOC_STRUCT(r600_query);
        if (query == NULL)
@@ -1639,16 +1668,12 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
                return NULL;
        }
 
-       /* adjust buffer size to simplify offsets wrapping math */
-       buffer_size -= buffer_size % query->result_size;
-
        /* Queries are normally read by the CPU after
         * being written by the gpu, hence staging is probably a good
         * usage pattern.
         */
-       query->buffer = (struct r600_resource*)
-               pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buffer_size);
-       if (!query->buffer) {
+       query->buffer.buf = r600_new_query_buffer(ctx, query_type);
+       if (!query->buffer.buf) {
                FREE(query);
                return NULL;
        }
@@ -1657,7 +1682,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
 
 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
 {
-       pipe_resource_reference((struct pipe_resource**)&query->buffer, NULL);
+       pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
        free(query);
 }
 
@@ -1667,27 +1692,30 @@ boolean r600_context_query_result(struct r600_context *ctx,
 {
        boolean *result_b = (boolean*)vresult;
        uint64_t *result_u64 = (uint64_t*)vresult;
+       union r600_query_result result;
        struct pipe_query_data_so_statistics *result_so =
                (struct pipe_query_data_so_statistics*)vresult;
 
-       if (!r600_query_result(ctx, query, wait))
+       memset(&result, 0, sizeof(result));
+
+       if (!r600_query_result(ctx, query, wait, &result))
                return FALSE;
 
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_PRIMITIVES_EMITTED:
        case PIPE_QUERY_PRIMITIVES_GENERATED:
-               *result_u64 = query->result.u64;
+               *result_u64 = result.u64;
                break;
        case PIPE_QUERY_OCCLUSION_PREDICATE:
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-               *result_b = query->result.b;
+               *result_b = result.b;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
-               *result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq;
+               *result_u64 = (1000000 * result.u64) / ctx->screen->info.r600_clock_crystal_freq;
                break;
        case PIPE_QUERY_SO_STATISTICS:
-               *result_so = query->result.so;
+               *result_so = result.so;
                break;
        default:
                assert(0);
index f2e6d01065935cd6a2947eee689082cbf4680a94..cf026abf1fbf1f3e5ff50145b7dbf6a2f6a1d1ab 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "r600_pipe.h"
 #include "r600d.h"
+#include "util/u_memory.h"
 
 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
 {
@@ -56,6 +57,30 @@ static void r600_update_occlusion_query_state(struct r600_context *rctx,
        }
 }
 
+static void r600_query_discard_results(struct r600_context *rctx,
+                                      struct r600_query *query)
+{
+       /* Discard the old query buffers. */
+       struct r600_query_buffer *prev = query->buffer.previous;
+
+       while (prev) {
+               struct r600_query_buffer *qbuf = prev;
+               prev = prev->previous;
+               pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
+               FREE(qbuf);
+       }
+
+       /* Obtain a new buffer if the current one can't be mapped without a stall. */
+       if (rctx->ws->cs_is_buffer_referenced(rctx->cs, query->buffer.buf->cs_buf) ||
+           rctx->ws->buffer_is_busy(query->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
+               pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+               query->buffer.buf = r600_new_query_buffer(rctx, query->type);
+       }
+
+       query->buffer.results_end = 0;
+       query->buffer.previous = NULL;
+}
+
 static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
@@ -63,9 +88,8 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 
        r600_update_occlusion_query_state(rctx, rquery->type, 1);
 
-       memset(&rquery->result, 0, sizeof(rquery->result));
-       rquery->results_start = rquery->results_end;
-       r600_query_begin(rctx, (struct r600_query *)query);
+       r600_query_discard_results(rctx, rquery);
+       r600_query_begin(rctx, rquery);
        LIST_ADDTAIL(&rquery->list, &rctx->active_query_list);
 }
 
@@ -98,14 +122,6 @@ static void r600_render_condition(struct pipe_context *ctx,
        struct r600_query *rquery = (struct r600_query *)query;
        int wait_flag = 0;
 
-       /* If we already have nonzero result, render unconditionally */
-       if (query != NULL && rquery->result.u64 != 0) {
-               if (rctx->current_render_cond) {
-                       r600_render_condition(ctx, NULL, 0);
-               }
-               return;
-       }
-
        rctx->current_render_cond = query;
        rctx->current_render_cond_mode = mode;