- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
- struct si_qbo_state saved_state = {};
- struct pipe_resource *tmp_buffer = NULL;
- unsigned tmp_buffer_offset = 0;
-
- if (!sctx->sh_query_result_shader) {
- sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
- if (!sctx->sh_query_result_shader)
- return;
- }
-
- if (query->first != query->last) {
- u_suballocator_alloc(sctx->allocator_zeroed_memory, 16, 16,
- &tmp_buffer_offset, &tmp_buffer);
- if (!tmp_buffer)
- return;
- }
-
- si_save_qbo_state(sctx, &saved_state);
-
- /* Pre-fill the constants configuring the shader behavior. */
- struct {
- uint32_t config;
- uint32_t offset;
- uint32_t chain;
- uint32_t result_count;
- } consts;
- struct pipe_constant_buffer constant_buffer = {};
-
- if (index >= 0) {
- switch (query->b.type) {
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- consts.offset = sizeof(uint32_t) * query->stream;
- consts.config = 0;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- consts.offset = sizeof(uint32_t) * (4 + query->stream);
- consts.config = 0;
- break;
- case PIPE_QUERY_SO_STATISTICS:
- consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
- consts.config = 0;
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- consts.offset = sizeof(uint32_t) * query->stream;
- consts.config = 2;
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- consts.offset = 0;
- consts.config = 3;
- break;
- default: unreachable("bad query type");
- }
- } else {
- /* Check result availability. */
- consts.offset = 0;
- consts.config = 1;
- }
-
- if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
- consts.config |= 8;
-
- constant_buffer.buffer_size = sizeof(consts);
- constant_buffer.user_buffer = &consts;
-
- /* Pre-fill the SSBOs and grid. */
- struct pipe_shader_buffer ssbo[3];
- struct pipe_grid_info grid = {};
-
- ssbo[1].buffer = tmp_buffer;
- ssbo[1].buffer_offset = tmp_buffer_offset;
- ssbo[1].buffer_size = 16;
-
- ssbo[2] = ssbo[1];
-
- sctx->b.bind_compute_state(&sctx->b, sctx->sh_query_result_shader);
-
- grid.block[0] = 1;
- grid.block[1] = 1;
- grid.block[2] = 1;
- grid.grid[0] = 1;
- grid.grid[1] = 1;
- grid.grid[2] = 1;
-
- struct gfx10_sh_query_buffer *qbuf = query->first;
- for (;;) {
- unsigned begin = qbuf == query->first ? query->first_begin : 0;
- unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
- if (!end)
- continue;
-
- ssbo[0].buffer = &qbuf->buf->b.b;
- ssbo[0].buffer_offset = begin;
- ssbo[0].buffer_size = end - begin;
-
- consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
- consts.chain = 0;
- if (qbuf != query->first)
- consts.chain |= 1;
- if (qbuf != query->last)
- consts.chain |= 2;
-
- if (qbuf == query->last) {
- ssbo[2].buffer = resource;
- ssbo[2].buffer_offset = offset;
- ssbo[2].buffer_size = 8;
- }
-
- sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
- sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo, 0x6);
-
- if (wait) {
- uint64_t va;
-
- /* Wait for result availability. Wait only for readiness
- * of the last entry, since the fence writes should be
- * serialized in the CP.
- */
- va = qbuf->buf->gpu_address;
- va += end - sizeof(struct gfx10_sh_query_buffer_mem);
- va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
-
- si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
- }
-
- sctx->b.launch_grid(&sctx->b, &grid);
- sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
-
- if (qbuf == query->last)
- break;
- qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
- }
-
- si_restore_qbo_state(sctx, &saved_state);
- pipe_resource_reference(&tmp_buffer, NULL);
+ struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct si_qbo_state saved_state = {};
+ struct pipe_resource *tmp_buffer = NULL;
+ unsigned tmp_buffer_offset = 0;
+
+ if (!sctx->sh_query_result_shader) {
+ sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
+ if (!sctx->sh_query_result_shader)
+ return;
+ }
+
+ if (query->first != query->last) {
+ u_suballocator_alloc(sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
+ if (!tmp_buffer)
+ return;
+ }
+
+ si_save_qbo_state(sctx, &saved_state);
+
+ /* Pre-fill the constants configuring the shader behavior. */
+ struct {
+ uint32_t config;
+ uint32_t offset;
+ uint32_t chain;
+ uint32_t result_count;
+ } consts;
+ struct pipe_constant_buffer constant_buffer = {};
+
+ if (index >= 0) {
+ switch (query->b.type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ consts.offset = sizeof(uint32_t) * query->stream;
+ consts.config = 0;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ consts.offset = sizeof(uint32_t) * (4 + query->stream);
+ consts.config = 0;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
+ consts.config = 0;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ consts.offset = sizeof(uint32_t) * query->stream;
+ consts.config = 2;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ consts.offset = 0;
+ consts.config = 3;
+ break;
+ default:
+ unreachable("bad query type");
+ }
+ } else {
+ /* Check result availability. */
+ consts.offset = 0;
+ consts.config = 1;
+ }
+
+ if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
+ consts.config |= 8;
+
+ constant_buffer.buffer_size = sizeof(consts);
+ constant_buffer.user_buffer = &consts;
+
+ /* Pre-fill the SSBOs and grid. */
+ struct pipe_shader_buffer ssbo[3];
+ struct pipe_grid_info grid = {};
+
+ ssbo[1].buffer = tmp_buffer;
+ ssbo[1].buffer_offset = tmp_buffer_offset;
+ ssbo[1].buffer_size = 16;
+
+ ssbo[2] = ssbo[1];
+
+ sctx->b.bind_compute_state(&sctx->b, sctx->sh_query_result_shader);
+
+ grid.block[0] = 1;
+ grid.block[1] = 1;
+ grid.block[2] = 1;
+ grid.grid[0] = 1;
+ grid.grid[1] = 1;
+ grid.grid[2] = 1;
+
+ struct gfx10_sh_query_buffer *qbuf = query->first;
+ for (;;) {
+ unsigned begin = qbuf == query->first ? query->first_begin : 0;
+ unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
+ if (!end)
+ continue;
+
+ ssbo[0].buffer = &qbuf->buf->b.b;
+ ssbo[0].buffer_offset = begin;
+ ssbo[0].buffer_size = end - begin;
+
+ consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
+ consts.chain = 0;
+ if (qbuf != query->first)
+ consts.chain |= 1;
+ if (qbuf != query->last)
+ consts.chain |= 2;
+
+ if (qbuf == query->last) {
+ ssbo[2].buffer = resource;
+ ssbo[2].buffer_offset = offset;
+ ssbo[2].buffer_size = 8;
+ }
+
+ sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
+ sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo, 0x6);
+
+ if (wait) {
+ uint64_t va;
+
+ /* Wait for result availability. Wait only for readiness
+ * of the last entry, since the fence writes should be
+ * serialized in the CP.
+ */
+ va = qbuf->buf->gpu_address;
+ va += end - sizeof(struct gfx10_sh_query_buffer_mem);
+ va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
+
+ si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
+ }
+
+ sctx->b.launch_grid(&sctx->b, &grid);
+ sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+ if (qbuf == query->last)
+ break;
+ qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
+ }
+
+ si_restore_qbo_state(sctx, &saved_state);
+ pipe_resource_reference(&tmp_buffer, NULL);