From: Mike Blumenkrantz Date: Mon, 1 Jun 2020 18:59:15 +0000 (-0400) Subject: zink: implement transform feedback support to finish off opengl 3.0 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=37778fcd9a352430af0cd3b28a8776479a7c8380;p=mesa.git zink: implement transform feedback support to finish off opengl 3.0 this adds: * context hooks for gallium stream output methods * handling for xfb-related queries * barrier management for pausing and resuming xfb loosely based on patches originally written by Dave Airlie Reviewed-by: Erik Faye-Lund Part-of: --- diff --git a/src/gallium/drivers/zink/zink_blit.c b/src/gallium/drivers/zink/zink_blit.c index 2aeb2247833..74e1024b616 100644 --- a/src/gallium/drivers/zink/zink_blit.c +++ b/src/gallium/drivers/zink/zink_blit.c @@ -208,6 +208,7 @@ zink_blit(struct pipe_context *pctx, util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[PIPE_SHADER_FRAGMENT]); util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->buffers); util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask); + util_blitter_save_so_targets(ctx->blitter, ctx->num_so_targets, ctx->so_targets); util_blitter_blit(ctx->blitter, info); } diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 465056f85cb..8ffdcc4607c 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "zink_context.h" #include "zink_compiler.h" #include "zink_program.h" #include "zink_screen.h" diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 73bdebec9dd..abc1fbc44f6 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -55,6 +55,8 @@ struct zink_shader { shader_info info; + struct pipe_stream_output_info stream_output; + struct { int index; int binding; diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 05702ada104..83e048216aa 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -366,7 +366,16 @@ zink_set_vertex_buffers(struct pipe_context *pctx, if (buffers) { for (int i = 0; i < num_buffers; ++i) { const struct pipe_vertex_buffer *vb = buffers + i; + struct zink_resource *res = zink_resource(vb->buffer.resource); + ctx->gfx_pipeline_state.bindings[start_slot + i].stride = vb->stride; + if (res && res->needs_xfb_barrier) { + /* if we're binding a previously-used xfb buffer, we need cmd buffer synchronization to ensure + * that we use the right buffer data + */ + pctx->flush(pctx, NULL, 0); + res->needs_xfb_barrier = false; + } } } @@ -912,6 +921,9 @@ zink_flush(struct pipe_context *pctx, struct zink_batch *batch = zink_curr_batch(ctx); flush_batch(ctx); + if (zink_screen(pctx->screen)->have_EXT_transform_feedback && ctx->num_so_targets) + ctx->dirty_so_targets = true; + if (pfence) zink_fence_reference(zink_screen(pctx->screen), (struct zink_fence **)pfence, @@ -1014,6 +1026,73 @@ zink_resource_copy_region(struct pipe_context *pctx, debug_printf("zink: TODO resource copy\n"); } +static struct pipe_stream_output_target * +zink_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct zink_so_target *t; + t = CALLOC_STRUCT(zink_so_target); + if (!t) + return NULL; + + t->base.reference.count = 1; + t->base.context = pctx; + pipe_resource_reference(&t->base.buffer, pres); + t->base.buffer_offset = buffer_offset; + t->base.buffer_size = buffer_size; + + /* using PIPE_BIND_CUSTOM here lets us create a custom pipe buffer resource, + * which allows us to differentiate and use VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT + * as we must for this case + */ + t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 4); + if (!t->counter_buffer) { + FREE(t); + return NULL; + } + + return &t->base; +} + +static void +zink_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *psot) +{ + struct zink_so_target *t = (struct zink_so_target *)psot; + pipe_resource_reference(&t->counter_buffer, NULL); + pipe_resource_reference(&t->base.buffer, NULL); + FREE(t); +} + +static void +zink_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct zink_context *ctx = zink_context(pctx); + + if (num_targets == 0) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) + pipe_so_target_reference(&ctx->so_targets[i], NULL); + ctx->num_so_targets = 0; + } else { + for (unsigned i = 0; i < num_targets; i++) + pipe_so_target_reference(&ctx->so_targets[i], targets[i]); + for (unsigned i = num_targets; i < ctx->num_so_targets; i++) + pipe_so_target_reference(&ctx->so_targets[i], NULL); + ctx->num_so_targets = num_targets; + + /* emit memory barrier on next draw for synchronization */ + if (offsets[0] == (unsigned)-1) + ctx->xfb_barrier = true; + /* TODO: possibly avoid rebinding on resume if resuming from same buffers? */ + ctx->dirty_so_targets = true; + } +} + struct pipe_context * zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { @@ -1063,7 +1142,10 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->base.resource_copy_region = zink_resource_copy_region; ctx->base.blit = zink_blit; + ctx->base.create_stream_output_target = zink_create_stream_output_target; + ctx->base.stream_output_target_destroy = zink_stream_output_target_destroy; + ctx->base.set_stream_output_targets = zink_set_stream_output_targets; ctx->base.flush_resource = zink_flush_resource; zink_context_surface_init(&ctx->base); zink_context_resource_init(&ctx->base); diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index 76fa780b9f7..6affea61523 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -57,6 +57,20 @@ zink_sampler_view(struct pipe_sampler_view *pview) return (struct zink_sampler_view *)pview; } +struct zink_so_target { + struct pipe_stream_output_target base; + struct pipe_resource *counter_buffer; + VkDeviceSize counter_buffer_offset; + uint32_t stride; + bool counter_buffer_valid; +}; + +static inline struct zink_so_target * +zink_so_target(struct pipe_stream_output_target *so_target) +{ + return (struct zink_so_target *)so_target; +} + struct zink_context { struct pipe_context base; struct slab_child_pool transfer_pool; @@ -111,6 +125,11 @@ struct zink_context { bool queries_disabled; struct pipe_resource *dummy_buffer; + + uint32_t num_so_targets; + struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_OUTPUTS]; + bool dirty_so_targets; + bool xfb_barrier; }; static inline struct zink_context * diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c index 553579acf64..1699a7b601b 100644 --- a/src/gallium/drivers/zink/zink_draw.c +++ b/src/gallium/drivers/zink/zink_draw.c @@ -36,6 +36,100 @@ allocate_descriptor_set(struct zink_screen *screen, return desc_set; } +static void +zink_emit_xfb_counter_barrier(struct zink_context *ctx) +{ + /* Between the pause and resume there needs to be a memory barrier for the counter buffers + * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT + * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT + * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT + * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT. + * + * - from VK_EXT_transform_feedback spec + */ + VkBufferMemoryBarrier barriers[PIPE_MAX_SO_OUTPUTS] = {}; + unsigned barrier_count = 0; + + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + if (t->counter_buffer_valid) { + barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[i].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + barriers[i].dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; + barriers[i].buffer = zink_resource(t->counter_buffer)->buffer; + barriers[i].size = VK_WHOLE_SIZE; + barrier_count++; + } + } + struct zink_batch *batch = zink_batch_no_rp(ctx); + vkCmdPipelineBarrier(batch->cmdbuf, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, + 0, NULL, + barrier_count, barriers, + 0, NULL + ); + ctx->xfb_barrier = false; +} + +static void +zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res) +{ + /* A pipeline barrier is required between using the buffers as + * transform feedback buffers and vertex buffers to + * ensure all writes to the transform feedback buffers are visible + * when the data is read as vertex attributes. + * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT + * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT + * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively. + * + * - 20.3.1. Drawing Transform Feedback + */ + VkBufferMemoryBarrier barriers[1] = {}; + barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[0].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + barriers[0].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barriers[0].buffer = res->buffer; + barriers[0].size = VK_WHOLE_SIZE; + struct zink_batch *batch = zink_batch_no_rp(ctx); + zink_batch_reference_resoure(batch, res); + vkCmdPipelineBarrier(batch->cmdbuf, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + 0, + 0, NULL, + ARRAY_SIZE(barriers), barriers, + 0, NULL + ); + res->needs_xfb_barrier = false; +} + +static void +zink_emit_stream_output_targets(struct pipe_context *pctx) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_batch *batch = zink_curr_batch(ctx); + VkBuffer buffers[PIPE_MAX_SO_OUTPUTS]; + VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS]; + VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS]; + + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; + buffers[i] = zink_resource(t->base.buffer)->buffer; + zink_batch_reference_resoure(batch, zink_resource(t->base.buffer)); + buffer_offsets[i] = t->base.buffer_offset; + buffer_sizes[i] = t->base.buffer_size; + } + + screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->cmdbuf, 0, ctx->num_so_targets, + buffers, buffer_offsets, + buffer_sizes); + ctx->dirty_so_targets = false; +} + static void zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) { @@ -110,6 +204,9 @@ zink_draw_vbo(struct pipe_context *pctx, struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_rasterizer_state *rast_state = ctx->rast_state; + struct zink_so_target *so_target = zink_so_target(dinfo->count_from_stream_output); + VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS]; + VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {}; if (dinfo->mode >= PIPE_PRIM_QUADS || dinfo->mode == PIPE_PRIM_LINE_LOOP || @@ -175,6 +272,13 @@ zink_draw_vbo(struct pipe_context *pctx, if (!shader) continue; + if (i == MESA_SHADER_VERTEX && ctx->num_so_targets) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + t->stride = shader->stream_output.stride[i] * sizeof(uint32_t); + } + } + for (int j = 0; j < shader->num_bindings; j++) { int index = shader->bindings[j].index; if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { @@ -227,6 +331,16 @@ zink_draw_vbo(struct pipe_context *pctx, VK_IMAGE_LAYOUT_GENERAL); } + if (ctx->xfb_barrier) + zink_emit_xfb_counter_barrier(ctx); + + if (ctx->dirty_so_targets) + zink_emit_stream_output_targets(pctx); + + if (so_target && zink_resource(so_target->base.buffer)->needs_xfb_barrier) + zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer)); + + batch = zink_batch_rp(ctx); if (batch->descs_left < gfx_program->num_descriptors) { @@ -295,6 +409,20 @@ zink_draw_vbo(struct pipe_context *pctx, gfx_program->layout, 0, 1, &desc_set, 0, NULL); zink_bind_vertex_buffers(batch, ctx); + if (ctx->num_so_targets) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + struct zink_resource *res = zink_resource(t->counter_buffer); + if (t->counter_buffer_valid) { + zink_batch_reference_resoure(batch, zink_resource(t->counter_buffer)); + counter_buffers[i] = res->buffer; + counter_buffer_offsets[i] = t->counter_buffer_offset; + } else + counter_buffers[i] = NULL; + } + screen->vk_CmdBeginTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); + } + if (dinfo->index_size > 0) { assert(dinfo->index_size != 1); VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; @@ -304,9 +432,28 @@ zink_draw_vbo(struct pipe_context *pctx, vkCmdDrawIndexed(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->index_bias, dinfo->start_instance); - } else - vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance); + } else { + if (so_target && screen->tf_props.transformFeedbackDraw) { + zink_batch_reference_resoure(batch, zink_resource(so_target->counter_buffer)); + screen->vk_CmdDrawIndirectByteCountEXT(batch->cmdbuf, dinfo->instance_count, dinfo->start_instance, + zink_resource(so_target->counter_buffer)->buffer, so_target->counter_buffer_offset, 0, + MIN2(so_target->stride, screen->tf_props.maxTransformFeedbackBufferDataStride)); + } + else + vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance); + } if (dinfo->index_size > 0 && dinfo->has_user_indices) pipe_resource_reference(&index_buffer, NULL); + + if (ctx->num_so_targets) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + counter_buffers[i] = zink_resource(t->counter_buffer)->buffer; + counter_buffer_offsets[i] = t->counter_buffer_offset; + t->counter_buffer_valid = true; + zink_resource(ctx->so_targets[i]->buffer)->needs_xfb_barrier = true; + } + screen->vk_CmdEndTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); + } } diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c index 1d0050306d1..10cb9b0750f 100644 --- a/src/gallium/drivers/zink/zink_query.c +++ b/src/gallium/drivers/zink/zink_query.c @@ -15,6 +15,7 @@ struct zink_query { unsigned curr_query, num_queries; VkQueryType vkqtype; + unsigned index; bool use_64bit; bool precise; @@ -37,7 +38,11 @@ convert_query_type(unsigned query_type, bool *use_64bit, bool *precise) *use_64bit = true; return VK_QUERY_TYPE_TIMESTAMP; case PIPE_QUERY_PIPELINE_STATISTICS: + case PIPE_QUERY_PRIMITIVES_GENERATED: return VK_QUERY_TYPE_PIPELINE_STATISTICS; + case PIPE_QUERY_PRIMITIVES_EMITTED: + *use_64bit = true; + return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; default: debug_printf("unknown query: %s\n", util_str_query_type(query_type, true)); @@ -56,6 +61,7 @@ zink_create_query(struct pipe_context *pctx, if (!query) return NULL; + query->index = index; query->type = query_type; query->vkqtype = convert_query_type(query_type, &query->use_64bit, &query->precise); if (query->vkqtype == -1) @@ -67,6 +73,8 @@ zink_create_query(struct pipe_context *pctx, pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; pool_create.queryType = query->vkqtype; pool_create.queryCount = query->num_queries; + if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) + pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT; VkResult status = vkCreateQueryPool(screen->dev, &pool_create, NULL, &query->query_pool); if (status != VK_SUCCESS) { @@ -106,13 +114,20 @@ zink_destroy_query(struct pipe_context *pctx, } static void -begin_query(struct zink_batch *batch, struct zink_query *q) +begin_query(struct zink_context *ctx, struct zink_query *q) { VkQueryControlFlags flags = 0; + struct zink_batch *batch = zink_curr_batch(ctx); if (q->precise) flags |= VK_QUERY_CONTROL_PRECISE_BIT; - - vkCmdBeginQuery(batch->cmdbuf, q->query_pool, q->curr_query, flags); + if (q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) + zink_screen(ctx->base.screen)->vk_CmdBeginQueryIndexedEXT(batch->cmdbuf, + q->query_pool, + q->curr_query, + flags, + q->index); + else + vkCmdBeginQuery(batch->cmdbuf, q->query_pool, q->curr_query, flags); } static bool @@ -134,17 +149,22 @@ zink_begin_query(struct pipe_context *pctx, vkCmdResetQueryPool(batch->cmdbuf, query->query_pool, 0, MIN2(query->curr_query + 1, query->num_queries)); query->curr_query = 0; - begin_query(batch, query); + begin_query(ctx, query); list_addtail(&query->active_list, &ctx->active_queries); return true; } static void -end_query(struct zink_batch *batch, struct zink_query *q) +end_query(struct zink_context *ctx, struct zink_query *q) { + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_batch *batch = zink_curr_batch(ctx); assert(q->type != PIPE_QUERY_TIMESTAMP); - vkCmdEndQuery(batch->cmdbuf, q->query_pool, q->curr_query); + if (q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) + screen->vk_CmdEndQueryIndexedEXT(batch->cmdbuf, q->query_pool, q->curr_query, q->index); + else + vkCmdEndQuery(batch->cmdbuf, q->query_pool, q->curr_query); if (++q->curr_query == q->num_queries) { assert(0); /* need to reset pool! */ @@ -156,15 +176,15 @@ zink_end_query(struct pipe_context *pctx, struct pipe_query *q) { struct zink_context *ctx = zink_context(pctx); - struct zink_batch *batch = zink_curr_batch(ctx); struct zink_query *query = (struct zink_query *)q; if (query->type == PIPE_QUERY_TIMESTAMP) { assert(query->curr_query == 0); + struct zink_batch *batch = zink_curr_batch(ctx); vkCmdWriteTimestamp(batch->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query->query_pool, 0); } else { - end_query(batch, query); + end_query(ctx, query); list_delinit(&query->active_list); } @@ -194,17 +214,36 @@ zink_get_query_result(struct pipe_context *pctx, // union pipe_query_result results[100]; uint64_t results[100]; memset(results, 0, sizeof(results)); - assert(query->curr_query <= ARRAY_SIZE(results)); - if (vkGetQueryPoolResults(screen->dev, query->query_pool, - 0, query->curr_query, - sizeof(results), - results, - sizeof(uint64_t), - flags) != VK_SUCCESS) - return false; + int num_results; + if (query->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) { + char tf_result[16] = {}; + /* this query emits 2 values */ + assert(query->curr_query <= ARRAY_SIZE(results) / 2); + num_results = query->curr_query * 2; + VkResult status = vkGetQueryPoolResults(screen->dev, query->query_pool, + 0, query->curr_query, + sizeof(results), + results, + sizeof(uint64_t), + flags); + if (status != VK_SUCCESS) + return false; + memcpy(result, tf_result + (query->type == PIPE_QUERY_PRIMITIVES_GENERATED ? 8 : 0), 8); + } else { + assert(query->curr_query <= ARRAY_SIZE(results)); + num_results = query->curr_query; + VkResult status = vkGetQueryPoolResults(screen->dev, query->query_pool, + 0, query->curr_query, + sizeof(results), + results, + sizeof(uint64_t), + flags); + if (status != VK_SUCCESS) + return false; + } util_query_clear_result(result, query->type); - for (int i = 0; i < query->curr_query; ++i) { + for (int i = 0; i < num_results; ++i) { switch (query->type) { case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: @@ -217,6 +256,18 @@ zink_get_query_result(struct pipe_context *pctx, case PIPE_QUERY_OCCLUSION_COUNTER: result->u64 += results[i]; break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + result->u32 += results[i]; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + /* A query pool created with this type will capture 2 integers - + * numPrimitivesWritten and numPrimitivesNeeded - + * for the specified vertex stream output from the last vertex processing stage. + * - from VK_EXT_transform_feedback spec + */ + result->u64 += results[i]; + i++; + break; default: debug_printf("unhangled query type: %s\n", @@ -233,7 +284,7 @@ zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch) { struct zink_query *query; LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, active_list) { - end_query(batch, query); + end_query(ctx, query); } } @@ -243,7 +294,7 @@ zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch) struct zink_query *query; LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, active_list) { vkCmdResetQueryPool(batch->cmdbuf, query->query_pool, query->curr_query, 1); - begin_query(batch, query); + begin_query(ctx, query); } } diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index a52d8bb6c70..5ae75dba2be 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -122,6 +122,12 @@ resource_create(struct pipe_screen *pscreen, if (templ->bind & PIPE_BIND_COMMAND_ARGS_BUFFER) bci.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + if (templ->bind == (PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM)) { + bci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT; + } else if (templ->bind & PIPE_BIND_STREAM_OUTPUT) { + bci.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + } + if (vkCreateBuffer(screen->dev, &bci, NULL, &res->buffer) != VK_SUCCESS) { FREE(res); diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h index 65e5e19dc73..9bca4c53a43 100644 --- a/src/gallium/drivers/zink/zink_resource.h +++ b/src/gallium/drivers/zink/zink_resource.h @@ -49,6 +49,8 @@ struct zink_resource { struct sw_displaytarget *dt; unsigned dt_stride; + + bool needs_xfb_barrier; }; struct zink_transfer { diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 0fe77d9e2d0..2510715454f 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -725,7 +725,14 @@ load_device_extensions(struct zink_screen *screen) if (!screen->vk_##x) \ return false; \ } while (0) - + if (screen->have_EXT_transform_feedback) { + GET_PROC_ADDR(CmdBindTransformFeedbackBuffersEXT); + GET_PROC_ADDR(CmdBeginTransformFeedbackEXT); + GET_PROC_ADDR(CmdEndTransformFeedbackEXT); + GET_PROC_ADDR(CmdBeginQueryIndexedEXT); + GET_PROC_ADDR(CmdEndQueryIndexedEXT); + GET_PROC_ADDR(CmdDrawIndirectByteCountEXT); + } if (screen->have_KHR_external_memory_fd) GET_PROC_ADDR(GetMemoryFdKHR); diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index e50a5f32370..e1138c9249f 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -48,10 +48,12 @@ struct zink_screen { VkPhysicalDeviceProperties props; VkPhysicalDeviceFeatures feats; VkPhysicalDeviceMemoryProperties mem_props; + VkPhysicalDeviceTransformFeedbackPropertiesEXT tf_props; bool have_KHR_maintenance1; bool have_KHR_external_memory_fd; bool have_EXT_conditional_rendering; + bool have_EXT_transform_feedback; bool have_X8_D24_UNORM_PACK32; bool have_D24_UNORM_S8_UINT; @@ -62,6 +64,13 @@ struct zink_screen { PFN_vkGetMemoryFdKHR vk_GetMemoryFdKHR; PFN_vkCmdBeginConditionalRenderingEXT vk_CmdBeginConditionalRenderingEXT; PFN_vkCmdEndConditionalRenderingEXT vk_CmdEndConditionalRenderingEXT; + + PFN_vkCmdBindTransformFeedbackBuffersEXT vk_CmdBindTransformFeedbackBuffersEXT; + PFN_vkCmdBeginTransformFeedbackEXT vk_CmdBeginTransformFeedbackEXT; + PFN_vkCmdEndTransformFeedbackEXT vk_CmdEndTransformFeedbackEXT; + PFN_vkCmdBeginQueryIndexedEXT vk_CmdBeginQueryIndexedEXT; + PFN_vkCmdEndQueryIndexedEXT vk_CmdEndQueryIndexedEXT; + PFN_vkCmdDrawIndirectByteCountEXT vk_CmdDrawIndirectByteCountEXT; }; static inline struct zink_screen *