zink: implement transform feedback support to finish off opengl 3.0
[mesa.git] / src / gallium / drivers / zink / zink_draw.c
1 #include "zink_compiler.h"
2 #include "zink_context.h"
3 #include "zink_program.h"
4 #include "zink_resource.h"
5 #include "zink_screen.h"
6 #include "zink_state.h"
7
8 #include "indices/u_primconvert.h"
9 #include "util/hash_table.h"
10 #include "util/u_debug.h"
11 #include "util/u_helpers.h"
12 #include "util/u_inlines.h"
13 #include "util/u_prim.h"
14
15 static VkDescriptorSet
16 allocate_descriptor_set(struct zink_screen *screen,
17 struct zink_batch *batch,
18 struct zink_gfx_program *prog)
19 {
20 assert(batch->descs_left >= prog->num_descriptors);
21 VkDescriptorSetAllocateInfo dsai;
22 memset((void *)&dsai, 0, sizeof(dsai));
23 dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
24 dsai.pNext = NULL;
25 dsai.descriptorPool = batch->descpool;
26 dsai.descriptorSetCount = 1;
27 dsai.pSetLayouts = &prog->dsl;
28
29 VkDescriptorSet desc_set;
30 if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) {
31 debug_printf("ZINK: failed to allocate descriptor set :/");
32 return VK_NULL_HANDLE;
33 }
34
35 batch->descs_left -= prog->num_descriptors;
36 return desc_set;
37 }
38
39 static void
40 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
41 {
42 /* Between the pause and resume there needs to be a memory barrier for the counter buffers
43 * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
44 * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
45 * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
46 * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
47 *
48 * - from VK_EXT_transform_feedback spec
49 */
50 VkBufferMemoryBarrier barriers[PIPE_MAX_SO_OUTPUTS] = {};
51 unsigned barrier_count = 0;
52
53 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
54 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
55 if (t->counter_buffer_valid) {
56 barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
57 barriers[i].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
58 barriers[i].dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
59 barriers[i].buffer = zink_resource(t->counter_buffer)->buffer;
60 barriers[i].size = VK_WHOLE_SIZE;
61 barrier_count++;
62 }
63 }
64 struct zink_batch *batch = zink_batch_no_rp(ctx);
65 vkCmdPipelineBarrier(batch->cmdbuf,
66 VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
67 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
68 0,
69 0, NULL,
70 barrier_count, barriers,
71 0, NULL
72 );
73 ctx->xfb_barrier = false;
74 }
75
76 static void
77 zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
78 {
79 /* A pipeline barrier is required between using the buffers as
80 * transform feedback buffers and vertex buffers to
81 * ensure all writes to the transform feedback buffers are visible
82 * when the data is read as vertex attributes.
83 * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
84 * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
85 * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
86 * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
87 *
88 * - 20.3.1. Drawing Transform Feedback
89 */
90 VkBufferMemoryBarrier barriers[1] = {};
91 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
92 barriers[0].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
93 barriers[0].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
94 barriers[0].buffer = res->buffer;
95 barriers[0].size = VK_WHOLE_SIZE;
96 struct zink_batch *batch = zink_batch_no_rp(ctx);
97 zink_batch_reference_resoure(batch, res);
98 vkCmdPipelineBarrier(batch->cmdbuf,
99 VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
100 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
101 0,
102 0, NULL,
103 ARRAY_SIZE(barriers), barriers,
104 0, NULL
105 );
106 res->needs_xfb_barrier = false;
107 }
108
109 static void
110 zink_emit_stream_output_targets(struct pipe_context *pctx)
111 {
112 struct zink_context *ctx = zink_context(pctx);
113 struct zink_screen *screen = zink_screen(pctx->screen);
114 struct zink_batch *batch = zink_curr_batch(ctx);
115 VkBuffer buffers[PIPE_MAX_SO_OUTPUTS];
116 VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS];
117 VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS];
118
119 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
120 struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
121 buffers[i] = zink_resource(t->base.buffer)->buffer;
122 zink_batch_reference_resoure(batch, zink_resource(t->base.buffer));
123 buffer_offsets[i] = t->base.buffer_offset;
124 buffer_sizes[i] = t->base.buffer_size;
125 }
126
127 screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->cmdbuf, 0, ctx->num_so_targets,
128 buffers, buffer_offsets,
129 buffer_sizes);
130 ctx->dirty_so_targets = false;
131 }
132
133 static void
134 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
135 {
136 VkBuffer buffers[PIPE_MAX_ATTRIBS];
137 VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
138 const struct zink_vertex_elements_state *elems = ctx->element_state;
139 for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
140 struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i];
141 assert(vb);
142 if (vb->buffer.resource) {
143 struct zink_resource *res = zink_resource(vb->buffer.resource);
144 buffers[i] = res->buffer;
145 buffer_offsets[i] = vb->buffer_offset;
146 zink_batch_reference_resoure(batch, res);
147 } else {
148 buffers[i] = zink_resource(ctx->dummy_buffer)->buffer;
149 buffer_offsets[i] = 0;
150 }
151 }
152
153 if (elems->hw_state.num_bindings > 0)
154 vkCmdBindVertexBuffers(batch->cmdbuf, 0,
155 elems->hw_state.num_bindings,
156 buffers, buffer_offsets);
157 }
158
159 static struct zink_gfx_program *
160 get_gfx_program(struct zink_context *ctx)
161 {
162 if (ctx->dirty_program) {
163 struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache,
164 ctx->gfx_stages);
165 if (!entry) {
166 struct zink_gfx_program *prog;
167 prog = zink_create_gfx_program(zink_screen(ctx->base.screen),
168 ctx->gfx_stages);
169 entry = _mesa_hash_table_insert(ctx->program_cache, prog->stages, prog);
170 if (!entry)
171 return NULL;
172 }
173 ctx->curr_program = entry->data;
174 ctx->dirty_program = false;
175 }
176
177 assert(ctx->curr_program);
178 return ctx->curr_program;
179 }
180
181 static bool
182 line_width_needed(enum pipe_prim_type reduced_prim,
183 VkPolygonMode polygon_mode)
184 {
185 switch (reduced_prim) {
186 case PIPE_PRIM_POINTS:
187 return false;
188
189 case PIPE_PRIM_LINES:
190 return true;
191
192 case PIPE_PRIM_TRIANGLES:
193 return polygon_mode == VK_POLYGON_MODE_LINE;
194
195 default:
196 unreachable("unexpected reduced prim");
197 }
198 }
199
200 void
201 zink_draw_vbo(struct pipe_context *pctx,
202 const struct pipe_draw_info *dinfo)
203 {
204 struct zink_context *ctx = zink_context(pctx);
205 struct zink_screen *screen = zink_screen(pctx->screen);
206 struct zink_rasterizer_state *rast_state = ctx->rast_state;
207 struct zink_so_target *so_target = zink_so_target(dinfo->count_from_stream_output);
208 VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
209 VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {};
210
211 if (dinfo->mode >= PIPE_PRIM_QUADS ||
212 dinfo->mode == PIPE_PRIM_LINE_LOOP ||
213 dinfo->index_size == 1) {
214 if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count))
215 return;
216
217 util_primconvert_save_rasterizer_state(ctx->primconvert, &rast_state->base);
218 util_primconvert_draw_vbo(ctx->primconvert, dinfo);
219 return;
220 }
221
222 struct zink_gfx_program *gfx_program = get_gfx_program(ctx);
223 if (!gfx_program)
224 return;
225
226 VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program,
227 &ctx->gfx_pipeline_state,
228 dinfo->mode);
229
230 enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode);
231
232 bool depth_bias = false;
233 switch (reduced_prim) {
234 case PIPE_PRIM_POINTS:
235 depth_bias = rast_state->offset_point;
236 break;
237
238 case PIPE_PRIM_LINES:
239 depth_bias = rast_state->offset_line;
240 break;
241
242 case PIPE_PRIM_TRIANGLES:
243 depth_bias = rast_state->offset_tri;
244 break;
245
246 default:
247 unreachable("unexpected reduced prim");
248 }
249
250 unsigned index_offset = 0;
251 struct pipe_resource *index_buffer = NULL;
252 if (dinfo->index_size > 0) {
253 if (dinfo->has_user_indices) {
254 if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset, 4)) {
255 debug_printf("util_upload_index_buffer() failed\n");
256 return;
257 }
258 } else
259 index_buffer = dinfo->index.resource;
260 }
261
262 VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
263 VkDescriptorBufferInfo buffer_infos[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS];
264 VkDescriptorImageInfo image_infos[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
265 int num_wds = 0, num_buffer_info = 0, num_image_info = 0;
266
267 struct zink_resource *transitions[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
268 int num_transitions = 0;
269
270 for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
271 struct zink_shader *shader = ctx->gfx_stages[i];
272 if (!shader)
273 continue;
274
275 if (i == MESA_SHADER_VERTEX && ctx->num_so_targets) {
276 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
277 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
278 t->stride = shader->stream_output.stride[i] * sizeof(uint32_t);
279 }
280 }
281
282 for (int j = 0; j < shader->num_bindings; j++) {
283 int index = shader->bindings[j].index;
284 if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
285 assert(ctx->ubos[i][index].buffer_size > 0);
286 assert(ctx->ubos[i][index].buffer_size <= screen->props.limits.maxUniformBufferRange);
287 assert(ctx->ubos[i][index].buffer);
288 struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
289 buffer_infos[num_buffer_info].buffer = res->buffer;
290 buffer_infos[num_buffer_info].offset = ctx->ubos[i][index].buffer_offset;
291 buffer_infos[num_buffer_info].range = ctx->ubos[i][index].buffer_size;
292 wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
293 ++num_buffer_info;
294 } else {
295 struct pipe_sampler_view *psampler_view = ctx->image_views[i][index];
296 assert(psampler_view);
297 struct zink_sampler_view *sampler_view = zink_sampler_view(psampler_view);
298
299 struct zink_resource *res = zink_resource(psampler_view->texture);
300 VkImageLayout layout = res->layout;
301 if (layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
302 layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
303 layout != VK_IMAGE_LAYOUT_GENERAL) {
304 transitions[num_transitions++] = res;
305 layout = VK_IMAGE_LAYOUT_GENERAL;
306 }
307 image_infos[num_image_info].imageLayout = layout;
308 image_infos[num_image_info].imageView = sampler_view->image_view;
309 image_infos[num_image_info].sampler = ctx->samplers[i][index];
310 wds[num_wds].pImageInfo = image_infos + num_image_info;
311 ++num_image_info;
312 }
313
314 wds[num_wds].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
315 wds[num_wds].pNext = NULL;
316 wds[num_wds].dstBinding = shader->bindings[j].binding;
317 wds[num_wds].dstArrayElement = 0;
318 wds[num_wds].descriptorCount = 1;
319 wds[num_wds].descriptorType = shader->bindings[j].type;
320 ++num_wds;
321 }
322 }
323
324 struct zink_batch *batch;
325 if (num_transitions > 0) {
326 batch = zink_batch_no_rp(ctx);
327
328 for (int i = 0; i < num_transitions; ++i)
329 zink_resource_barrier(batch->cmdbuf, transitions[i],
330 transitions[i]->aspect,
331 VK_IMAGE_LAYOUT_GENERAL);
332 }
333
334 if (ctx->xfb_barrier)
335 zink_emit_xfb_counter_barrier(ctx);
336
337 if (ctx->dirty_so_targets)
338 zink_emit_stream_output_targets(pctx);
339
340 if (so_target && zink_resource(so_target->base.buffer)->needs_xfb_barrier)
341 zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
342
343
344 batch = zink_batch_rp(ctx);
345
346 if (batch->descs_left < gfx_program->num_descriptors) {
347 ctx->base.flush(&ctx->base, NULL, 0);
348 batch = zink_batch_rp(ctx);
349 assert(batch->descs_left >= gfx_program->num_descriptors);
350 }
351
352 VkDescriptorSet desc_set = allocate_descriptor_set(screen, batch,
353 gfx_program);
354 assert(desc_set != VK_NULL_HANDLE);
355
356 for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
357 struct zink_shader *shader = ctx->gfx_stages[i];
358 if (!shader)
359 continue;
360
361 for (int j = 0; j < shader->num_bindings; j++) {
362 int index = shader->bindings[j].index;
363 if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
364 struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
365 zink_batch_reference_resoure(batch, res);
366 } else {
367 struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->image_views[i][index]);
368 zink_batch_reference_sampler_view(batch, sampler_view);
369 }
370 }
371 }
372
373 vkCmdSetViewport(batch->cmdbuf, 0, ctx->num_viewports, ctx->viewports);
374 if (ctx->rast_state->base.scissor)
375 vkCmdSetScissor(batch->cmdbuf, 0, ctx->num_viewports, ctx->scissors);
376 else if (ctx->fb_state.width && ctx->fb_state.height) {
377 VkRect2D fb_scissor = {};
378 fb_scissor.extent.width = ctx->fb_state.width;
379 fb_scissor.extent.height = ctx->fb_state.height;
380 vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
381 }
382
383 if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
384 if (screen->feats.wideLines || ctx->line_width == 1.0f)
385 vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
386 else
387 debug_printf("BUG: wide lines not supported, needs fallback!");
388 }
389
390 vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref.ref_value[0]);
391 vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref.ref_value[1]);
392
393 if (depth_bias)
394 vkCmdSetDepthBias(batch->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
395 else
396 vkCmdSetDepthBias(batch->cmdbuf, 0.0f, 0.0f, 0.0f);
397
398 if (ctx->gfx_pipeline_state.blend_state->need_blend_constants)
399 vkCmdSetBlendConstants(batch->cmdbuf, ctx->blend_constants);
400
401 if (num_wds > 0) {
402 for (int i = 0; i < num_wds; ++i)
403 wds[i].dstSet = desc_set;
404 vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
405 }
406
407 vkCmdBindPipeline(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
408 vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
409 gfx_program->layout, 0, 1, &desc_set, 0, NULL);
410 zink_bind_vertex_buffers(batch, ctx);
411
412 if (ctx->num_so_targets) {
413 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
414 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
415 struct zink_resource *res = zink_resource(t->counter_buffer);
416 if (t->counter_buffer_valid) {
417 zink_batch_reference_resoure(batch, zink_resource(t->counter_buffer));
418 counter_buffers[i] = res->buffer;
419 counter_buffer_offsets[i] = t->counter_buffer_offset;
420 } else
421 counter_buffers[i] = NULL;
422 }
423 screen->vk_CmdBeginTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
424 }
425
426 if (dinfo->index_size > 0) {
427 assert(dinfo->index_size != 1);
428 VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
429 struct zink_resource *res = zink_resource(index_buffer);
430 vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type);
431 zink_batch_reference_resoure(batch, res);
432 vkCmdDrawIndexed(batch->cmdbuf,
433 dinfo->count, dinfo->instance_count,
434 dinfo->start, dinfo->index_bias, dinfo->start_instance);
435 } else {
436 if (so_target && screen->tf_props.transformFeedbackDraw) {
437 zink_batch_reference_resoure(batch, zink_resource(so_target->counter_buffer));
438 screen->vk_CmdDrawIndirectByteCountEXT(batch->cmdbuf, dinfo->instance_count, dinfo->start_instance,
439 zink_resource(so_target->counter_buffer)->buffer, so_target->counter_buffer_offset, 0,
440 MIN2(so_target->stride, screen->tf_props.maxTransformFeedbackBufferDataStride));
441 }
442 else
443 vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
444 }
445
446 if (dinfo->index_size > 0 && dinfo->has_user_indices)
447 pipe_resource_reference(&index_buffer, NULL);
448
449 if (ctx->num_so_targets) {
450 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
451 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
452 counter_buffers[i] = zink_resource(t->counter_buffer)->buffer;
453 counter_buffer_offsets[i] = t->counter_buffer_offset;
454 t->counter_buffer_valid = true;
455 zink_resource(ctx->so_targets[i]->buffer)->needs_xfb_barrier = true;
456 }
457 screen->vk_CmdEndTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
458 }
459 }