From ca35292a4456fec1c584d40bf9b4197fe733f609 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Tue, 21 Sep 2010 20:24:51 -0400
Subject: [PATCH] r600g: occlusion query for new design

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 src/gallium/drivers/r600/r600.h           |  38 +++++-
 src/gallium/drivers/r600/r600_state2.c    |  47 +++++++
 src/gallium/winsys/r600/drm/r600_priv.h   |   7 +-
 src/gallium/winsys/r600/drm/r600_state2.c | 151 +++++++++++++++++++++-
 4 files changed, 237 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 8879efca794..6d87220db2a 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -28,6 +28,8 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <util/u_double_list.h>
+#include <pipe/p_compiler.h>
 
 #define RADEON_CTX_MAX_PM4	(64 * 1024 / 4)
 
@@ -183,6 +185,9 @@ struct r600_group {
 	unsigned		*offset_block_id;
 };
 
+/*
+ * relocation
+ */
 #pragma pack(1)
 struct r600_reloc {
 	uint32_t	handle;
@@ -192,7 +197,29 @@ struct r600_reloc {
 };
 #pragma pack()
 
-struct radeon_bo;
+/*
+ * query
+ */
+struct r600_query {
+	u64					result;
+	/* The kind of query. Currently only OQ is supported. */
+	unsigned				type;
+	/* How many results have been written, in dwords. It's incremented
+	 * after end_query and flush. */
+	unsigned				num_results;
+	/* if we've flushed the query */
+	unsigned				state;
+	/* The buffer where query results are stored. */
+	struct radeon_ws_bo			*buffer;
+	unsigned				buffer_size;
+	/* linked list of queries */
+	struct list_head			list;
+};
+
+#define R600_QUERY_STATE_STARTED	(1 << 0)
+#define R600_QUERY_STATE_ENDED		(1 << 1)
+#define R600_QUERY_STATE_SUSPENDED	(1 << 2)
+
 
 struct r600_context {
 	struct radeon		*radeon;
@@ -207,6 +234,7 @@ struct r600_context {
 	struct r600_reloc	*reloc;
 	struct radeon_bo	**bo;
 	u32			*pm4;
+	struct list_head	query_list;
 };
 
 struct r600_draw {
@@ -229,4 +257,12 @@ void r600_context_flush(struct r600_context *ctx);
 void r600_context_dump_bof(struct r600_context *ctx, const char *file);
 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
 
+struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type);
+void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query);
+boolean r600_context_query_result(struct r600_context *ctx,
+				struct r600_query *query,
+				boolean wait, void *vresult);
+void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
+void r600_query_end(struct r600_context *ctx, struct r600_query *query);
+
 #endif
diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c
index f29aa0fdeaf..0343704a90c 100644
--- a/src/gallium/drivers/r600/r600_state2.c
+++ b/src/gallium/drivers/r600/r600_state2.c
@@ -2089,6 +2089,52 @@ static void r600_init_config2(struct r600_pipe_context *rctx)
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
 
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
+{
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+	return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type);
+}
+
+static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+	r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query);
+}
+
+static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+	r600_query_begin(&rctx->ctx, (struct r600_query *)query);
+}
+
+static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+	r600_query_end(&rctx->ctx, (struct r600_query *)query);
+}
+
+static boolean r600_get_query_result(struct pipe_context *ctx,
+					struct pipe_query *query,
+					boolean wait, void *vresult)
+{
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+	return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult);
+}
+
+static void r600_init_query_functions2(struct r600_pipe_context *rctx)
+{
+	rctx->context.create_query = r600_create_query;
+	rctx->context.destroy_query = r600_destroy_query;
+	rctx->context.begin_query = r600_begin_query;
+	rctx->context.end_query = r600_end_query;
+	rctx->context.get_query_result = r600_get_query_result;
+}
+
 static struct pipe_context *r600_create_context2(struct pipe_screen *screen, void *priv)
 {
 	struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context);
@@ -2108,6 +2154,7 @@ static struct pipe_context *r600_create_context2(struct pipe_screen *screen, voi
 	rctx->radeon = rscreen->radeon;
 
 	r600_init_blit_functions2(rctx);
+	r600_init_query_functions2(rctx);
 	r600_init_state_functions2(rctx);
 	r600_init_context_resource_functions2(rctx);
 
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index d02562f17f3..6023f215bb6 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -45,12 +45,11 @@ struct radeon *r600_new(int fd, unsigned device);
 void r600_delete(struct radeon *r600);
 
 struct r600_reg {
-	unsigned		need_bo;
-	unsigned		flush_flags;
-	unsigned		offset;
+	unsigned			need_bo;
+	unsigned			flush_flags;
+	unsigned			offset;
 };
 
-
 /* radeon_pciid.c */
 unsigned radeon_family_from_device(unsigned device);
 
diff --git a/src/gallium/winsys/r600/drm/r600_state2.c b/src/gallium/winsys/r600/drm/r600_state2.c
index e32071b0e4e..d02a5a3895d 100644
--- a/src/gallium/winsys/r600/drm/r600_state2.c
+++ b/src/gallium/winsys/r600/drm/r600_state2.c
@@ -60,6 +60,10 @@ void radeon_bo_reference(struct radeon *radeon,
 
 unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo);
 
+/* queries */
+static void r600_context_queries_suspend(struct r600_context *ctx);
+static void r600_context_queries_resume(struct r600_context *ctx);
+
 static int r600_group_id_register_offset(unsigned offset)
 {
 	if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) {
@@ -583,6 +587,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 
 	memset(ctx, 0, sizeof(struct r600_context));
 	ctx->radeon = radeon;
+	LIST_INITHEAD(&ctx->query_list);
 	/* initialize groups */
 	r = r600_group_init(&ctx->groups[R600_GROUP_CONFIG], R600_CONFIG_REG_OFFSET, R600_CONFIG_REG_END);
 	if (r) {
@@ -902,7 +907,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 		R600_ERR("context is too big to be scheduled\n");
 		return;
 	}
-	/* Ok we enough room to copy packet */
+	/* enough room to copy packet */
 	r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG], PKT3_SET_CONFIG_REG);
 	r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG);
 	r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST], PKT3_SET_ALU_CONST);
@@ -929,6 +934,8 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
+	/* all dirty state have been scheduled in current cs */
+	ctx->pm4_dirty_cdwords = 0;
 }
 
 void r600_context_flush(struct r600_context *ctx)
@@ -942,6 +949,9 @@ void r600_context_flush(struct r600_context *ctx)
 	if (!ctx->pm4_cdwords)
 		return;
 
+	/* suspend queries */
+	r600_context_queries_suspend(ctx);
+
 #if 1
 	/* emit cs */
 	drmib.num_chunks = 2;
@@ -964,6 +974,13 @@ void r600_context_flush(struct r600_context *ctx)
 	ctx->creloc = 0;
 	ctx->pm4_dirty_cdwords = 0;
 	ctx->pm4_cdwords = 0;
+
+	/* resume queries */
+	r600_context_queries_resume(ctx);
+
+	/* set all valid group as dirty so they get reemited on
+	 * next draw command
+	 */
 	for (int i = 0; i < ctx->ngroups; i++) {
 		for (int j = 0; j < ctx->groups[i].nblocks; j++) {
 			/* mark enabled block as dirty */
@@ -1057,3 +1074,135 @@ out_err:
 	bof_decref(device_id);
 	bof_decref(root);
 }
+
+static void r600_query_result(struct r600_context *ctx, struct r600_query *query)
+{
+	u64 start, end;
+	u32 *results;
+	int i;
+
+	results = radeon_ws_bo_map(ctx->radeon, query->buffer, 0, NULL);
+	for (i = 0; i < query->num_results; i += 4) {
+		start = (u64)results[i] | (u64)results[i + 1] << 32;
+		end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
+		if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
+			query->result += end - start;
+		}
+	}
+	radeon_ws_bo_unmap(ctx->radeon, query->buffer);
+	query->num_results = 0;
+}
+
+void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
+{
+	/* query request needs 6 dwords for begin + 6 dwords for end */
+	if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
+		/* need to flush */
+		r600_context_flush(ctx);
+	}
+
+	/* if query buffer is full force a flush */
+	if (query->num_results >= ((query->buffer_size >> 2) - 2)) {
+		r600_context_flush(ctx);
+		r600_query_result(ctx, query);
+	}
+
+	/* emit begin query */
+	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
+	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
+	ctx->pm4[ctx->pm4_cdwords++] = query->num_results;
+	ctx->pm4[ctx->pm4_cdwords++] = 0;
+	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+	ctx->pm4[ctx->pm4_cdwords++] = 0;
+	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
+
+	query->state |= R600_QUERY_STATE_STARTED;
+	query->state ^= R600_QUERY_STATE_ENDED;
+}
+
+void r600_query_end(struct r600_context *ctx, struct r600_query *query)
+{
+	/* emit begin query */
+	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
+	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
+	ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8;
+	ctx->pm4[ctx->pm4_cdwords++] = 0;
+	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+	ctx->pm4[ctx->pm4_cdwords++] = 0;
+	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
+
+	query->num_results += 16;
+	query->state ^= R600_QUERY_STATE_STARTED;
+	query->state |= R600_QUERY_STATE_ENDED;
+}
+
+struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
+{
+	struct r600_query *query;
+
+	if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
+		return NULL;
+
+	query = calloc(1, sizeof(struct r600_query));
+	if (query == NULL)
+		return NULL;
+
+	query->type = query_type;
+	query->buffer_size = 4096;
+
+	query->buffer = radeon_ws_bo(ctx->radeon, query->buffer_size, 1, 0);
+	if (!query->buffer) {
+		free(query);
+		return NULL;
+	}
+
+	LIST_ADDTAIL(&query->list, &ctx->query_list);
+
+	return query;
+}
+
+void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
+{
+	radeon_ws_bo_reference(ctx->radeon, &query->buffer, NULL);
+	LIST_DEL(&query->list);
+	free(query);
+}
+
+boolean r600_context_query_result(struct r600_context *ctx,
+				struct r600_query *query,
+				boolean wait, void *vresult)
+{
+	uint64_t *result = (uint64_t*)vresult;
+
+	if (query->num_results) {
+		r600_context_flush(ctx);
+	}
+	r600_query_result(ctx, query);
+	*result = query->result;
+	query->result = 0;
+	return TRUE;
+}
+
+static void r600_context_queries_suspend(struct r600_context *ctx)
+{
+	struct r600_query *query;
+
+	LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
+		if (query->state & R600_QUERY_STATE_STARTED) {
+			r600_query_end(ctx, query);
+			query->state |= R600_QUERY_STATE_SUSPENDED;
+		}
+	}
+}
+
+static void r600_context_queries_resume(struct r600_context *ctx)
+{
+	struct r600_query *query;
+
+	LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
+		if (query->state & R600_QUERY_STATE_SUSPENDED) {
+			r600_query_begin(ctx, query);
+			query->state ^= R600_QUERY_STATE_SUSPENDED;
+		}
+	}
+}
-- 
2.30.2