79e61ccb52bbe51b310bca0e04d6612fed70642a
[mesa.git] / src / gallium / drivers / zink / zink_draw.c
1 #include "zink_compiler.h"
2 #include "zink_context.h"
3 #include "zink_program.h"
4 #include "zink_resource.h"
5 #include "zink_screen.h"
6 #include "zink_state.h"
7
8 #include "indices/u_primconvert.h"
9 #include "util/hash_table.h"
10 #include "util/u_debug.h"
11 #include "util/u_helpers.h"
12 #include "util/u_inlines.h"
13 #include "util/u_prim.h"
14 #include "util/u_prim_restart.h"
15
16 static VkDescriptorSet
17 allocate_descriptor_set(struct zink_screen *screen,
18 struct zink_batch *batch,
19 struct zink_gfx_program *prog)
20 {
21 assert(batch->descs_left >= prog->num_descriptors);
22 VkDescriptorSetAllocateInfo dsai;
23 memset((void *)&dsai, 0, sizeof(dsai));
24 dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
25 dsai.pNext = NULL;
26 dsai.descriptorPool = batch->descpool;
27 dsai.descriptorSetCount = 1;
28 dsai.pSetLayouts = &prog->dsl;
29
30 VkDescriptorSet desc_set;
31 if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) {
32 debug_printf("ZINK: failed to allocate descriptor set :/");
33 return VK_NULL_HANDLE;
34 }
35
36 batch->descs_left -= prog->num_descriptors;
37 return desc_set;
38 }
39
40 static void
41 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
42 {
43 /* Between the pause and resume there needs to be a memory barrier for the counter buffers
44 * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
45 * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
46 * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
47 * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
48 *
49 * - from VK_EXT_transform_feedback spec
50 */
51 VkBufferMemoryBarrier barriers[PIPE_MAX_SO_OUTPUTS] = {};
52 unsigned barrier_count = 0;
53
54 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
55 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
56 if (t->counter_buffer_valid) {
57 barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
58 barriers[i].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
59 barriers[i].dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
60 barriers[i].buffer = zink_resource(t->counter_buffer)->buffer;
61 barriers[i].size = VK_WHOLE_SIZE;
62 barrier_count++;
63 }
64 }
65 struct zink_batch *batch = zink_batch_no_rp(ctx);
66 vkCmdPipelineBarrier(batch->cmdbuf,
67 VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
68 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
69 0,
70 0, NULL,
71 barrier_count, barriers,
72 0, NULL
73 );
74 ctx->xfb_barrier = false;
75 }
76
77 static void
78 zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
79 {
80 /* A pipeline barrier is required between using the buffers as
81 * transform feedback buffers and vertex buffers to
82 * ensure all writes to the transform feedback buffers are visible
83 * when the data is read as vertex attributes.
84 * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
85 * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
86 * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
87 * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
88 *
89 * - 20.3.1. Drawing Transform Feedback
90 */
91 VkBufferMemoryBarrier barriers[1] = {};
92 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
93 barriers[0].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
94 barriers[0].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
95 barriers[0].buffer = res->buffer;
96 barriers[0].size = VK_WHOLE_SIZE;
97 struct zink_batch *batch = zink_batch_no_rp(ctx);
98 zink_batch_reference_resoure(batch, res);
99 vkCmdPipelineBarrier(batch->cmdbuf,
100 VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
101 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
102 0,
103 0, NULL,
104 ARRAY_SIZE(barriers), barriers,
105 0, NULL
106 );
107 res->needs_xfb_barrier = false;
108 }
109
110 static void
111 zink_emit_stream_output_targets(struct pipe_context *pctx)
112 {
113 struct zink_context *ctx = zink_context(pctx);
114 struct zink_screen *screen = zink_screen(pctx->screen);
115 struct zink_batch *batch = zink_curr_batch(ctx);
116 VkBuffer buffers[PIPE_MAX_SO_OUTPUTS];
117 VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS];
118 VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS];
119
120 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
121 struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
122 buffers[i] = zink_resource(t->base.buffer)->buffer;
123 zink_batch_reference_resoure(batch, zink_resource(t->base.buffer));
124 buffer_offsets[i] = t->base.buffer_offset;
125 buffer_sizes[i] = t->base.buffer_size;
126 }
127
128 screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->cmdbuf, 0, ctx->num_so_targets,
129 buffers, buffer_offsets,
130 buffer_sizes);
131 ctx->dirty_so_targets = false;
132 }
133
134 static void
135 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
136 {
137 VkBuffer buffers[PIPE_MAX_ATTRIBS];
138 VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
139 const struct zink_vertex_elements_state *elems = ctx->element_state;
140 for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
141 struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i];
142 assert(vb);
143 if (vb->buffer.resource) {
144 struct zink_resource *res = zink_resource(vb->buffer.resource);
145 buffers[i] = res->buffer;
146 buffer_offsets[i] = vb->buffer_offset;
147 zink_batch_reference_resoure(batch, res);
148 } else {
149 buffers[i] = zink_resource(ctx->dummy_buffer)->buffer;
150 buffer_offsets[i] = 0;
151 }
152 }
153
154 if (elems->hw_state.num_bindings > 0)
155 vkCmdBindVertexBuffers(batch->cmdbuf, 0,
156 elems->hw_state.num_bindings,
157 buffers, buffer_offsets);
158 }
159
160 static struct zink_gfx_program *
161 get_gfx_program(struct zink_context *ctx)
162 {
163 if (ctx->dirty_program) {
164 struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache,
165 ctx->gfx_stages);
166 if (!entry) {
167 struct zink_gfx_program *prog;
168 prog = zink_create_gfx_program(ctx, ctx->gfx_stages);
169 entry = _mesa_hash_table_insert(ctx->program_cache, prog->shaders, prog);
170 if (!entry)
171 return NULL;
172 }
173 ctx->curr_program = entry->data;
174 ctx->dirty_program = false;
175 }
176
177 assert(ctx->curr_program);
178 return ctx->curr_program;
179 }
180
181 static bool
182 line_width_needed(enum pipe_prim_type reduced_prim,
183 VkPolygonMode polygon_mode)
184 {
185 switch (reduced_prim) {
186 case PIPE_PRIM_POINTS:
187 return false;
188
189 case PIPE_PRIM_LINES:
190 return true;
191
192 case PIPE_PRIM_TRIANGLES:
193 return polygon_mode == VK_POLYGON_MODE_LINE;
194
195 default:
196 unreachable("unexpected reduced prim");
197 }
198 }
199
200 static inline bool
201 restart_supported(enum pipe_prim_type mode)
202 {
203 return mode == PIPE_PRIM_LINE_STRIP || mode == PIPE_PRIM_TRIANGLE_STRIP || mode == PIPE_PRIM_TRIANGLE_FAN;
204 }
205
206 void
207 zink_draw_vbo(struct pipe_context *pctx,
208 const struct pipe_draw_info *dinfo)
209 {
210 struct zink_context *ctx = zink_context(pctx);
211 struct zink_screen *screen = zink_screen(pctx->screen);
212 struct zink_rasterizer_state *rast_state = ctx->rast_state;
213 struct zink_so_target *so_target = zink_so_target(dinfo->count_from_stream_output);
214 VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
215 VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {};
216 bool need_index_buffer_unref = false;
217
218
219 if (dinfo->primitive_restart && !restart_supported(dinfo->mode)) {
220 util_draw_vbo_without_prim_restart(pctx, dinfo);
221 return;
222 }
223 if (dinfo->mode >= PIPE_PRIM_QUADS ||
224 dinfo->mode == PIPE_PRIM_LINE_LOOP) {
225 if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count))
226 return;
227
228 util_primconvert_save_rasterizer_state(ctx->primconvert, &rast_state->base);
229 util_primconvert_draw_vbo(ctx->primconvert, dinfo);
230 return;
231 }
232
233 struct zink_gfx_program *gfx_program = get_gfx_program(ctx);
234 if (!gfx_program)
235 return;
236
237 ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart;
238
239 VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program,
240 &ctx->gfx_pipeline_state,
241 dinfo->mode);
242
243 enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode);
244
245 bool depth_bias = false;
246 switch (reduced_prim) {
247 case PIPE_PRIM_POINTS:
248 depth_bias = rast_state->offset_point;
249 break;
250
251 case PIPE_PRIM_LINES:
252 depth_bias = rast_state->offset_line;
253 break;
254
255 case PIPE_PRIM_TRIANGLES:
256 depth_bias = rast_state->offset_tri;
257 break;
258
259 default:
260 unreachable("unexpected reduced prim");
261 }
262
263 unsigned index_offset = 0;
264 struct pipe_resource *index_buffer = NULL;
265 if (dinfo->index_size > 0) {
266 uint32_t restart_index = util_prim_restart_index_from_size(dinfo->index_size);
267 if ((dinfo->primitive_restart && (dinfo->restart_index != restart_index)) ||
268 (!screen->have_EXT_index_type_uint8 && dinfo->index_size == 8)) {
269 util_translate_prim_restart_ib(pctx, dinfo, &index_buffer);
270 need_index_buffer_unref = true;
271 } else {
272 if (dinfo->has_user_indices) {
273 if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset, 4)) {
274 debug_printf("util_upload_index_buffer() failed\n");
275 return;
276 }
277 } else
278 index_buffer = dinfo->index.resource;
279 }
280 }
281
282 VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
283 VkDescriptorBufferInfo buffer_infos[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS];
284 VkDescriptorImageInfo image_infos[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
285 int num_wds = 0, num_buffer_info = 0, num_image_info = 0;
286
287 struct zink_resource *transitions[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
288 int num_transitions = 0;
289
290 for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
291 struct zink_shader *shader = ctx->gfx_stages[i];
292 if (!shader)
293 continue;
294
295 if (i == MESA_SHADER_VERTEX && ctx->num_so_targets) {
296 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
297 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
298 t->stride = shader->streamout.so_info.stride[i] * sizeof(uint32_t);
299 }
300 }
301
302 for (int j = 0; j < shader->num_bindings; j++) {
303 int index = shader->bindings[j].index;
304 if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
305 assert(ctx->ubos[i][index].buffer_size > 0);
306 assert(ctx->ubos[i][index].buffer_size <= screen->props.limits.maxUniformBufferRange);
307 assert(ctx->ubos[i][index].buffer);
308 struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
309 buffer_infos[num_buffer_info].buffer = res->buffer;
310 buffer_infos[num_buffer_info].offset = ctx->ubos[i][index].buffer_offset;
311 buffer_infos[num_buffer_info].range = ctx->ubos[i][index].buffer_size;
312 wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
313 ++num_buffer_info;
314 } else {
315 struct pipe_sampler_view *psampler_view = ctx->image_views[i][index];
316 assert(psampler_view);
317 struct zink_sampler_view *sampler_view = zink_sampler_view(psampler_view);
318
319 struct zink_resource *res = zink_resource(psampler_view->texture);
320 VkImageLayout layout = res->layout;
321 if (layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
322 layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
323 layout != VK_IMAGE_LAYOUT_GENERAL) {
324 transitions[num_transitions++] = res;
325 layout = VK_IMAGE_LAYOUT_GENERAL;
326 }
327 image_infos[num_image_info].imageLayout = layout;
328 image_infos[num_image_info].imageView = sampler_view->image_view;
329 image_infos[num_image_info].sampler = ctx->samplers[i][index];
330 wds[num_wds].pImageInfo = image_infos + num_image_info;
331 ++num_image_info;
332 }
333
334 wds[num_wds].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
335 wds[num_wds].pNext = NULL;
336 wds[num_wds].dstBinding = shader->bindings[j].binding;
337 wds[num_wds].dstArrayElement = 0;
338 wds[num_wds].descriptorCount = 1;
339 wds[num_wds].descriptorType = shader->bindings[j].type;
340 ++num_wds;
341 }
342 }
343
344 struct zink_batch *batch;
345 if (num_transitions > 0) {
346 batch = zink_batch_no_rp(ctx);
347
348 for (int i = 0; i < num_transitions; ++i)
349 zink_resource_barrier(batch->cmdbuf, transitions[i],
350 transitions[i]->aspect,
351 VK_IMAGE_LAYOUT_GENERAL);
352 }
353
354 if (ctx->xfb_barrier)
355 zink_emit_xfb_counter_barrier(ctx);
356
357 if (ctx->dirty_so_targets)
358 zink_emit_stream_output_targets(pctx);
359
360 if (so_target && zink_resource(so_target->base.buffer)->needs_xfb_barrier)
361 zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
362
363
364 batch = zink_batch_rp(ctx);
365
366 if (batch->descs_left < gfx_program->num_descriptors) {
367 ctx->base.flush(&ctx->base, NULL, 0);
368 batch = zink_batch_rp(ctx);
369 assert(batch->descs_left >= gfx_program->num_descriptors);
370 }
371 zink_batch_reference_program(batch, ctx->curr_program);
372
373 VkDescriptorSet desc_set = allocate_descriptor_set(screen, batch,
374 gfx_program);
375 assert(desc_set != VK_NULL_HANDLE);
376
377 for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
378 struct zink_shader *shader = ctx->gfx_stages[i];
379 if (!shader)
380 continue;
381
382 for (int j = 0; j < shader->num_bindings; j++) {
383 int index = shader->bindings[j].index;
384 if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
385 struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
386 zink_batch_reference_resoure(batch, res);
387 } else {
388 struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->image_views[i][index]);
389 zink_batch_reference_sampler_view(batch, sampler_view);
390 }
391 }
392 }
393
394 vkCmdSetViewport(batch->cmdbuf, 0, ctx->num_viewports, ctx->viewports);
395 if (ctx->rast_state->base.scissor)
396 vkCmdSetScissor(batch->cmdbuf, 0, ctx->num_viewports, ctx->scissors);
397 else if (ctx->fb_state.width && ctx->fb_state.height) {
398 VkRect2D fb_scissor = {};
399 fb_scissor.extent.width = ctx->fb_state.width;
400 fb_scissor.extent.height = ctx->fb_state.height;
401 vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
402 }
403
404 if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
405 if (screen->feats.wideLines || ctx->line_width == 1.0f)
406 vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
407 else
408 debug_printf("BUG: wide lines not supported, needs fallback!");
409 }
410
411 vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref.ref_value[0]);
412 vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref.ref_value[1]);
413
414 if (depth_bias)
415 vkCmdSetDepthBias(batch->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
416 else
417 vkCmdSetDepthBias(batch->cmdbuf, 0.0f, 0.0f, 0.0f);
418
419 if (ctx->gfx_pipeline_state.blend_state->need_blend_constants)
420 vkCmdSetBlendConstants(batch->cmdbuf, ctx->blend_constants);
421
422 if (num_wds > 0) {
423 for (int i = 0; i < num_wds; ++i)
424 wds[i].dstSet = desc_set;
425 vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
426 }
427
428 vkCmdBindPipeline(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
429 vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
430 gfx_program->layout, 0, 1, &desc_set, 0, NULL);
431 zink_bind_vertex_buffers(batch, ctx);
432
433 if (ctx->num_so_targets) {
434 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
435 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
436 struct zink_resource *res = zink_resource(t->counter_buffer);
437 if (t->counter_buffer_valid) {
438 zink_batch_reference_resoure(batch, zink_resource(t->counter_buffer));
439 counter_buffers[i] = res->buffer;
440 counter_buffer_offsets[i] = t->counter_buffer_offset;
441 } else
442 counter_buffers[i] = VK_NULL_HANDLE;
443 }
444 screen->vk_CmdBeginTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
445 }
446
447 if (dinfo->index_size > 0) {
448 VkIndexType index_type;
449 unsigned index_size = dinfo->index_size;
450 if (need_index_buffer_unref)
451 /* index buffer will have been promoted from uint8 to uint16 in this case */
452 index_size = MAX2(index_size, 2);
453 switch (index_size) {
454 case 1:
455 assert(screen->have_EXT_index_type_uint8);
456 index_type = VK_INDEX_TYPE_UINT8_EXT;
457 break;
458 case 2:
459 index_type = VK_INDEX_TYPE_UINT16;
460 break;
461 case 4:
462 index_type = VK_INDEX_TYPE_UINT32;
463 break;
464 default:
465 unreachable("unknown index size!");
466 }
467 struct zink_resource *res = zink_resource(index_buffer);
468 vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type);
469 zink_batch_reference_resoure(batch, res);
470 vkCmdDrawIndexed(batch->cmdbuf,
471 dinfo->count, dinfo->instance_count,
472 need_index_buffer_unref ? 0 : dinfo->start, dinfo->index_bias, dinfo->start_instance);
473 } else {
474 if (so_target && screen->tf_props.transformFeedbackDraw) {
475 zink_batch_reference_resoure(batch, zink_resource(so_target->counter_buffer));
476 screen->vk_CmdDrawIndirectByteCountEXT(batch->cmdbuf, dinfo->instance_count, dinfo->start_instance,
477 zink_resource(so_target->counter_buffer)->buffer, so_target->counter_buffer_offset, 0,
478 MIN2(so_target->stride, screen->tf_props.maxTransformFeedbackBufferDataStride));
479 }
480 else
481 vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
482 }
483
484 if (dinfo->index_size > 0 && (dinfo->has_user_indices || need_index_buffer_unref))
485 pipe_resource_reference(&index_buffer, NULL);
486
487 if (ctx->num_so_targets) {
488 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
489 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
490 counter_buffers[i] = zink_resource(t->counter_buffer)->buffer;
491 counter_buffer_offsets[i] = t->counter_buffer_offset;
492 t->counter_buffer_valid = true;
493 zink_resource(ctx->so_targets[i]->buffer)->needs_xfb_barrier = true;
494 }
495 screen->vk_CmdEndTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
496 }
497 }