X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fzink%2Fzink_draw.c;h=b103d089df3bb1cf647073ff5afd11264cead8ad;hb=919818a8a0056bbd539566e930563218e2e30fd9;hp=1d276502ca9f78bca9eba7e67d261be5796d89d0;hpb=4c1cef68cf767057211ef589278523caffeb3d5b;p=mesa.git diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c index 1d276502ca9..b103d089df3 100644 --- a/src/gallium/drivers/zink/zink_draw.c +++ b/src/gallium/drivers/zink/zink_draw.c @@ -11,6 +11,7 @@ #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_prim.h" +#include "util/u_prim_restart.h" static VkDescriptorSet allocate_descriptor_set(struct zink_screen *screen, @@ -36,6 +37,100 @@ allocate_descriptor_set(struct zink_screen *screen, return desc_set; } +static void +zink_emit_xfb_counter_barrier(struct zink_context *ctx) +{ + /* Between the pause and resume there needs to be a memory barrier for the counter buffers + * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT + * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT + * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT + * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT. + * + * - from VK_EXT_transform_feedback spec + */ + VkBufferMemoryBarrier barriers[PIPE_MAX_SO_OUTPUTS] = {}; + unsigned barrier_count = 0; + + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + if (t->counter_buffer_valid) { + barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[i].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + barriers[i].dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; + barriers[i].buffer = zink_resource(t->counter_buffer)->buffer; + barriers[i].size = VK_WHOLE_SIZE; + barrier_count++; + } + } + struct zink_batch *batch = zink_batch_no_rp(ctx); + vkCmdPipelineBarrier(batch->cmdbuf, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, + 0, NULL, + barrier_count, barriers, + 0, NULL + ); + ctx->xfb_barrier = false; +} + +static void +zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res) +{ + /* A pipeline barrier is required between using the buffers as + * transform feedback buffers and vertex buffers to + * ensure all writes to the transform feedback buffers are visible + * when the data is read as vertex attributes. + * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT + * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT + * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively. + * + * - 20.3.1. Drawing Transform Feedback + */ + VkBufferMemoryBarrier barriers[1] = {}; + barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[0].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + barriers[0].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barriers[0].buffer = res->buffer; + barriers[0].size = VK_WHOLE_SIZE; + struct zink_batch *batch = zink_batch_no_rp(ctx); + zink_batch_reference_resoure(batch, res); + vkCmdPipelineBarrier(batch->cmdbuf, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + 0, + 0, NULL, + ARRAY_SIZE(barriers), barriers, + 0, NULL + ); + res->needs_xfb_barrier = false; +} + +static void +zink_emit_stream_output_targets(struct pipe_context *pctx) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_batch *batch = zink_curr_batch(ctx); + VkBuffer buffers[PIPE_MAX_SO_OUTPUTS]; + VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS]; + VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS]; + + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; + buffers[i] = zink_resource(t->base.buffer)->buffer; + zink_batch_reference_resoure(batch, zink_resource(t->base.buffer)); + buffer_offsets[i] = t->base.buffer_offset; + buffer_sizes[i] = t->base.buffer_size; + } + + screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->cmdbuf, 0, ctx->num_so_targets, + buffers, buffer_offsets, + buffer_sizes); + ctx->dirty_so_targets = false; +} + static void zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) { @@ -44,11 +139,16 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) const struct zink_vertex_elements_state *elems = ctx->element_state; for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i]; - assert(vb && vb->buffer.resource); - struct zink_resource *res = zink_resource(vb->buffer.resource); - buffers[i] = res->buffer; - buffer_offsets[i] = vb->buffer_offset; - zink_batch_reference_resoure(batch, res); + assert(vb); + if (vb->buffer.resource) { + struct zink_resource *res = zink_resource(vb->buffer.resource); + buffers[i] = res->buffer; + buffer_offsets[i] = vb->buffer_offset; + zink_batch_reference_resoure(batch, res); + } else { + buffers[i] = zink_resource(ctx->dummy_buffer)->buffer; + buffer_offsets[i] = 0; + } } if (elems->hw_state.num_bindings > 0) @@ -60,19 +160,18 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) static struct zink_gfx_program * get_gfx_program(struct zink_context *ctx) { - if (ctx->dirty_program) { + if (ctx->dirty_shader_stages) { struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache, ctx->gfx_stages); if (!entry) { struct zink_gfx_program *prog; - prog = zink_create_gfx_program(zink_screen(ctx->base.screen), - ctx->gfx_stages); - entry = _mesa_hash_table_insert(ctx->program_cache, prog->stages, prog); + prog = zink_create_gfx_program(ctx, ctx->gfx_stages); + entry = _mesa_hash_table_insert(ctx->program_cache, prog->shaders, prog); if (!entry) return NULL; } ctx->curr_program = entry->data; - ctx->dirty_program = false; + ctx->dirty_shader_stages = 0; } assert(ctx->curr_program); @@ -98,6 +197,12 @@ line_width_needed(enum pipe_prim_type reduced_prim, } } +static inline bool +restart_supported(enum pipe_prim_type mode) +{ + return mode == PIPE_PRIM_LINE_STRIP || mode == PIPE_PRIM_TRIANGLE_STRIP || mode == PIPE_PRIM_TRIANGLE_FAN; +} + void zink_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *dinfo) @@ -105,10 +210,18 @@ zink_draw_vbo(struct pipe_context *pctx, struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_rasterizer_state *rast_state = ctx->rast_state; + struct zink_so_target *so_target = zink_so_target(dinfo->count_from_stream_output); + VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS]; + VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {}; + bool need_index_buffer_unref = false; + + if (dinfo->primitive_restart && !restart_supported(dinfo->mode)) { + util_draw_vbo_without_prim_restart(pctx, dinfo); + return; + } if (dinfo->mode >= PIPE_PRIM_QUADS || - dinfo->mode == PIPE_PRIM_LINE_LOOP || - dinfo->index_size == 1) { + dinfo->mode == PIPE_PRIM_LINE_LOOP) { if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count)) return; @@ -121,6 +234,10 @@ zink_draw_vbo(struct pipe_context *pctx, if (!gfx_program) return; + if (ctx->gfx_pipeline_state.primitive_restart != !!dinfo->primitive_restart) + ctx->gfx_pipeline_state.hash = 0; + ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart; + VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program, &ctx->gfx_pipeline_state, dinfo->mode); @@ -148,13 +265,20 @@ zink_draw_vbo(struct pipe_context *pctx, unsigned index_offset = 0; struct pipe_resource *index_buffer = NULL; if (dinfo->index_size > 0) { - if (dinfo->has_user_indices) { - if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset)) { - debug_printf("util_upload_index_buffer() failed\n"); - return; - } - } else - index_buffer = dinfo->index.resource; + uint32_t restart_index = util_prim_restart_index_from_size(dinfo->index_size); + if ((dinfo->primitive_restart && (dinfo->restart_index != restart_index)) || + (!screen->have_EXT_index_type_uint8 && dinfo->index_size == 8)) { + util_translate_prim_restart_ib(pctx, dinfo, &index_buffer); + need_index_buffer_unref = true; + } else { + if (dinfo->has_user_indices) { + if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset, 4)) { + debug_printf("util_upload_index_buffer() failed\n"); + return; + } + } else + index_buffer = dinfo->index.resource; + } } VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS]; @@ -170,6 +294,13 @@ zink_draw_vbo(struct pipe_context *pctx, if (!shader) continue; + if (i == MESA_SHADER_VERTEX && ctx->num_so_targets) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + t->stride = shader->streamout.so_info.stride[i] * sizeof(uint32_t); + } + } + for (int j = 0; j < shader->num_bindings; j++) { int index = shader->bindings[j].index; if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { @@ -222,6 +353,16 @@ zink_draw_vbo(struct pipe_context *pctx, VK_IMAGE_LAYOUT_GENERAL); } + if (ctx->xfb_barrier) + zink_emit_xfb_counter_barrier(ctx); + + if (ctx->dirty_so_targets) + zink_emit_stream_output_targets(pctx); + + if (so_target && zink_resource(so_target->base.buffer)->needs_xfb_barrier) + zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer)); + + batch = zink_batch_rp(ctx); if (batch->descs_left < gfx_program->num_descriptors) { @@ -229,6 +370,7 @@ zink_draw_vbo(struct pipe_context *pctx, batch = zink_batch_rp(ctx); assert(batch->descs_left >= gfx_program->num_descriptors); } + zink_batch_reference_program(batch, ctx->curr_program); VkDescriptorSet desc_set = allocate_descriptor_set(screen, batch, gfx_program); @@ -290,18 +432,68 @@ zink_draw_vbo(struct pipe_context *pctx, gfx_program->layout, 0, 1, &desc_set, 0, NULL); zink_bind_vertex_buffers(batch, ctx); + if (ctx->num_so_targets) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + struct zink_resource *res = zink_resource(t->counter_buffer); + if (t->counter_buffer_valid) { + zink_batch_reference_resoure(batch, zink_resource(t->counter_buffer)); + counter_buffers[i] = res->buffer; + counter_buffer_offsets[i] = t->counter_buffer_offset; + } else + counter_buffers[i] = VK_NULL_HANDLE; + } + screen->vk_CmdBeginTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); + } + if (dinfo->index_size > 0) { - assert(dinfo->index_size != 1); - VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + VkIndexType index_type; + unsigned index_size = dinfo->index_size; + if (need_index_buffer_unref) + /* index buffer will have been promoted from uint8 to uint16 in this case */ + index_size = MAX2(index_size, 2); + switch (index_size) { + case 1: + assert(screen->have_EXT_index_type_uint8); + index_type = VK_INDEX_TYPE_UINT8_EXT; + break; + case 2: + index_type = VK_INDEX_TYPE_UINT16; + break; + case 4: + index_type = VK_INDEX_TYPE_UINT32; + break; + default: + unreachable("unknown index size!"); + } struct zink_resource *res = zink_resource(index_buffer); vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type); zink_batch_reference_resoure(batch, res); vkCmdDrawIndexed(batch->cmdbuf, dinfo->count, dinfo->instance_count, - dinfo->start, dinfo->index_bias, dinfo->start_instance); - } else - vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance); + need_index_buffer_unref ? 0 : dinfo->start, dinfo->index_bias, dinfo->start_instance); + } else { + if (so_target && screen->tf_props.transformFeedbackDraw) { + zink_batch_reference_resoure(batch, zink_resource(so_target->counter_buffer)); + screen->vk_CmdDrawIndirectByteCountEXT(batch->cmdbuf, dinfo->instance_count, dinfo->start_instance, + zink_resource(so_target->counter_buffer)->buffer, so_target->counter_buffer_offset, 0, + MIN2(so_target->stride, screen->tf_props.maxTransformFeedbackBufferDataStride)); + } + else + vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance); + } - if (dinfo->index_size > 0 && dinfo->has_user_indices) + if (dinfo->index_size > 0 && (dinfo->has_user_indices || need_index_buffer_unref)) pipe_resource_reference(&index_buffer, NULL); + + if (ctx->num_so_targets) { + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); + counter_buffers[i] = zink_resource(t->counter_buffer)->buffer; + counter_buffer_offsets[i] = t->counter_buffer_offset; + t->counter_buffer_valid = true; + zink_resource(ctx->so_targets[i]->buffer)->needs_xfb_barrier = true; + } + screen->vk_CmdEndTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); + } }