d39a261e2e19da206a74f5d6055bc9eee0c49a50
[mesa.git] / src / gallium / drivers / zink / zink_draw.c
1 #include "zink_compiler.h"
2 #include "zink_context.h"
3 #include "zink_program.h"
4 #include "zink_resource.h"
5 #include "zink_screen.h"
6 #include "zink_state.h"
7
8 #include "indices/u_primconvert.h"
9 #include "util/hash_table.h"
10 #include "util/u_debug.h"
11 #include "util/u_helpers.h"
12 #include "util/u_inlines.h"
13 #include "util/u_prim.h"
14 #include "util/u_prim_restart.h"
15
16 static VkDescriptorSet
17 allocate_descriptor_set(struct zink_screen *screen,
18 struct zink_batch *batch,
19 struct zink_gfx_program *prog)
20 {
21 assert(batch->descs_left >= prog->num_descriptors);
22 VkDescriptorSetAllocateInfo dsai;
23 memset((void *)&dsai, 0, sizeof(dsai));
24 dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
25 dsai.pNext = NULL;
26 dsai.descriptorPool = batch->descpool;
27 dsai.descriptorSetCount = 1;
28 dsai.pSetLayouts = &prog->dsl;
29
30 VkDescriptorSet desc_set;
31 if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) {
32 debug_printf("ZINK: failed to allocate descriptor set :/");
33 return VK_NULL_HANDLE;
34 }
35
36 batch->descs_left -= prog->num_descriptors;
37 return desc_set;
38 }
39
40 static void
41 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
42 {
43 /* Between the pause and resume there needs to be a memory barrier for the counter buffers
44 * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
45 * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
46 * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
47 * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
48 *
49 * - from VK_EXT_transform_feedback spec
50 */
51 VkBufferMemoryBarrier barriers[PIPE_MAX_SO_OUTPUTS] = {};
52 unsigned barrier_count = 0;
53
54 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
55 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
56 if (t->counter_buffer_valid) {
57 barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
58 barriers[i].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
59 barriers[i].dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
60 barriers[i].buffer = zink_resource(t->counter_buffer)->buffer;
61 barriers[i].size = VK_WHOLE_SIZE;
62 barrier_count++;
63 }
64 }
65 struct zink_batch *batch = zink_batch_no_rp(ctx);
66 vkCmdPipelineBarrier(batch->cmdbuf,
67 VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
68 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
69 0,
70 0, NULL,
71 barrier_count, barriers,
72 0, NULL
73 );
74 ctx->xfb_barrier = false;
75 }
76
77 static void
78 zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
79 {
80 /* A pipeline barrier is required between using the buffers as
81 * transform feedback buffers and vertex buffers to
82 * ensure all writes to the transform feedback buffers are visible
83 * when the data is read as vertex attributes.
84 * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
85 * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
86 * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
87 * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
88 *
89 * - 20.3.1. Drawing Transform Feedback
90 */
91 VkBufferMemoryBarrier barriers[1] = {};
92 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
93 barriers[0].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
94 barriers[0].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
95 barriers[0].buffer = res->buffer;
96 barriers[0].size = VK_WHOLE_SIZE;
97 struct zink_batch *batch = zink_batch_no_rp(ctx);
98 zink_batch_reference_resoure(batch, res);
99 vkCmdPipelineBarrier(batch->cmdbuf,
100 VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
101 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
102 0,
103 0, NULL,
104 ARRAY_SIZE(barriers), barriers,
105 0, NULL
106 );
107 res->needs_xfb_barrier = false;
108 }
109
110 static void
111 zink_emit_stream_output_targets(struct pipe_context *pctx)
112 {
113 struct zink_context *ctx = zink_context(pctx);
114 struct zink_screen *screen = zink_screen(pctx->screen);
115 struct zink_batch *batch = zink_curr_batch(ctx);
116 VkBuffer buffers[PIPE_MAX_SO_OUTPUTS];
117 VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS];
118 VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS];
119
120 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
121 struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
122 buffers[i] = zink_resource(t->base.buffer)->buffer;
123 zink_batch_reference_resoure(batch, zink_resource(t->base.buffer));
124 buffer_offsets[i] = t->base.buffer_offset;
125 buffer_sizes[i] = t->base.buffer_size;
126 }
127
128 screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->cmdbuf, 0, ctx->num_so_targets,
129 buffers, buffer_offsets,
130 buffer_sizes);
131 ctx->dirty_so_targets = false;
132 }
133
134 static void
135 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
136 {
137 VkBuffer buffers[PIPE_MAX_ATTRIBS];
138 VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
139 const struct zink_vertex_elements_state *elems = ctx->element_state;
140 for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
141 struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i];
142 assert(vb);
143 if (vb->buffer.resource) {
144 struct zink_resource *res = zink_resource(vb->buffer.resource);
145 buffers[i] = res->buffer;
146 buffer_offsets[i] = vb->buffer_offset;
147 zink_batch_reference_resoure(batch, res);
148 } else {
149 buffers[i] = zink_resource(ctx->dummy_buffer)->buffer;
150 buffer_offsets[i] = 0;
151 }
152 }
153
154 if (elems->hw_state.num_bindings > 0)
155 vkCmdBindVertexBuffers(batch->cmdbuf, 0,
156 elems->hw_state.num_bindings,
157 buffers, buffer_offsets);
158 }
159
160 static struct zink_gfx_program *
161 get_gfx_program(struct zink_context *ctx)
162 {
163 if (ctx->dirty_program) {
164 struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache,
165 ctx->gfx_stages);
166 if (!entry) {
167 struct zink_gfx_program *prog;
168 prog = zink_create_gfx_program(ctx, ctx->gfx_stages);
169 entry = _mesa_hash_table_insert(ctx->program_cache, prog->stages, prog);
170 if (!entry)
171 return NULL;
172 }
173 ctx->curr_program = entry->data;
174 ctx->dirty_program = false;
175 }
176
177 assert(ctx->curr_program);
178 return ctx->curr_program;
179 }
180
181 static bool
182 line_width_needed(enum pipe_prim_type reduced_prim,
183 VkPolygonMode polygon_mode)
184 {
185 switch (reduced_prim) {
186 case PIPE_PRIM_POINTS:
187 return false;
188
189 case PIPE_PRIM_LINES:
190 return true;
191
192 case PIPE_PRIM_TRIANGLES:
193 return polygon_mode == VK_POLYGON_MODE_LINE;
194
195 default:
196 unreachable("unexpected reduced prim");
197 }
198 }
199
200 void
201 zink_draw_vbo(struct pipe_context *pctx,
202 const struct pipe_draw_info *dinfo)
203 {
204 struct zink_context *ctx = zink_context(pctx);
205 struct zink_screen *screen = zink_screen(pctx->screen);
206 struct zink_rasterizer_state *rast_state = ctx->rast_state;
207 struct zink_so_target *so_target = zink_so_target(dinfo->count_from_stream_output);
208 VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
209 VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {};
210 bool need_index_buffer_unref = false;
211
212 if (dinfo->mode >= PIPE_PRIM_QUADS ||
213 dinfo->mode == PIPE_PRIM_LINE_LOOP) {
214 if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count))
215 return;
216
217 util_primconvert_save_rasterizer_state(ctx->primconvert, &rast_state->base);
218 util_primconvert_draw_vbo(ctx->primconvert, dinfo);
219 return;
220 }
221
222 struct zink_gfx_program *gfx_program = get_gfx_program(ctx);
223 if (!gfx_program)
224 return;
225
226 /* this is broken for anything requiring primconvert atm */
227 ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart;
228
229 VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program,
230 &ctx->gfx_pipeline_state,
231 dinfo->mode);
232
233 enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode);
234
235 bool depth_bias = false;
236 switch (reduced_prim) {
237 case PIPE_PRIM_POINTS:
238 depth_bias = rast_state->offset_point;
239 break;
240
241 case PIPE_PRIM_LINES:
242 depth_bias = rast_state->offset_line;
243 break;
244
245 case PIPE_PRIM_TRIANGLES:
246 depth_bias = rast_state->offset_tri;
247 break;
248
249 default:
250 unreachable("unexpected reduced prim");
251 }
252
253 unsigned index_offset = 0;
254 struct pipe_resource *index_buffer = NULL;
255 if (dinfo->index_size > 0) {
256 uint32_t restart_index = util_prim_restart_index_from_size(dinfo->index_size);
257 if ((dinfo->primitive_restart && (dinfo->restart_index != restart_index)) ||
258 (!screen->have_EXT_index_type_uint8 && dinfo->index_size == 8)) {
259 util_translate_prim_restart_ib(pctx, dinfo, &index_buffer);
260 need_index_buffer_unref = true;
261 } else {
262 if (dinfo->has_user_indices) {
263 if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset, 4)) {
264 debug_printf("util_upload_index_buffer() failed\n");
265 return;
266 }
267 } else
268 index_buffer = dinfo->index.resource;
269 }
270 }
271
272 VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
273 VkDescriptorBufferInfo buffer_infos[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS];
274 VkDescriptorImageInfo image_infos[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
275 int num_wds = 0, num_buffer_info = 0, num_image_info = 0;
276
277 struct zink_resource *transitions[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
278 int num_transitions = 0;
279
280 for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
281 struct zink_shader *shader = ctx->gfx_stages[i];
282 if (!shader)
283 continue;
284
285 if (i == MESA_SHADER_VERTEX && ctx->num_so_targets) {
286 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
287 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
288 t->stride = shader->stream_output.stride[i] * sizeof(uint32_t);
289 }
290 }
291
292 for (int j = 0; j < shader->num_bindings; j++) {
293 int index = shader->bindings[j].index;
294 if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
295 assert(ctx->ubos[i][index].buffer_size > 0);
296 assert(ctx->ubos[i][index].buffer_size <= screen->props.limits.maxUniformBufferRange);
297 assert(ctx->ubos[i][index].buffer);
298 struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
299 buffer_infos[num_buffer_info].buffer = res->buffer;
300 buffer_infos[num_buffer_info].offset = ctx->ubos[i][index].buffer_offset;
301 buffer_infos[num_buffer_info].range = ctx->ubos[i][index].buffer_size;
302 wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
303 ++num_buffer_info;
304 } else {
305 struct pipe_sampler_view *psampler_view = ctx->image_views[i][index];
306 assert(psampler_view);
307 struct zink_sampler_view *sampler_view = zink_sampler_view(psampler_view);
308
309 struct zink_resource *res = zink_resource(psampler_view->texture);
310 VkImageLayout layout = res->layout;
311 if (layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
312 layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
313 layout != VK_IMAGE_LAYOUT_GENERAL) {
314 transitions[num_transitions++] = res;
315 layout = VK_IMAGE_LAYOUT_GENERAL;
316 }
317 image_infos[num_image_info].imageLayout = layout;
318 image_infos[num_image_info].imageView = sampler_view->image_view;
319 image_infos[num_image_info].sampler = ctx->samplers[i][index];
320 wds[num_wds].pImageInfo = image_infos + num_image_info;
321 ++num_image_info;
322 }
323
324 wds[num_wds].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
325 wds[num_wds].pNext = NULL;
326 wds[num_wds].dstBinding = shader->bindings[j].binding;
327 wds[num_wds].dstArrayElement = 0;
328 wds[num_wds].descriptorCount = 1;
329 wds[num_wds].descriptorType = shader->bindings[j].type;
330 ++num_wds;
331 }
332 }
333
334 struct zink_batch *batch;
335 if (num_transitions > 0) {
336 batch = zink_batch_no_rp(ctx);
337
338 for (int i = 0; i < num_transitions; ++i)
339 zink_resource_barrier(batch->cmdbuf, transitions[i],
340 transitions[i]->aspect,
341 VK_IMAGE_LAYOUT_GENERAL);
342 }
343
344 if (ctx->xfb_barrier)
345 zink_emit_xfb_counter_barrier(ctx);
346
347 if (ctx->dirty_so_targets)
348 zink_emit_stream_output_targets(pctx);
349
350 if (so_target && zink_resource(so_target->base.buffer)->needs_xfb_barrier)
351 zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
352
353
354 batch = zink_batch_rp(ctx);
355
356 if (batch->descs_left < gfx_program->num_descriptors) {
357 ctx->base.flush(&ctx->base, NULL, 0);
358 batch = zink_batch_rp(ctx);
359 assert(batch->descs_left >= gfx_program->num_descriptors);
360 }
361
362 VkDescriptorSet desc_set = allocate_descriptor_set(screen, batch,
363 gfx_program);
364 assert(desc_set != VK_NULL_HANDLE);
365
366 for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
367 struct zink_shader *shader = ctx->gfx_stages[i];
368 if (!shader)
369 continue;
370
371 for (int j = 0; j < shader->num_bindings; j++) {
372 int index = shader->bindings[j].index;
373 if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
374 struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
375 zink_batch_reference_resoure(batch, res);
376 } else {
377 struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->image_views[i][index]);
378 zink_batch_reference_sampler_view(batch, sampler_view);
379 }
380 }
381 }
382
383 vkCmdSetViewport(batch->cmdbuf, 0, ctx->num_viewports, ctx->viewports);
384 if (ctx->rast_state->base.scissor)
385 vkCmdSetScissor(batch->cmdbuf, 0, ctx->num_viewports, ctx->scissors);
386 else if (ctx->fb_state.width && ctx->fb_state.height) {
387 VkRect2D fb_scissor = {};
388 fb_scissor.extent.width = ctx->fb_state.width;
389 fb_scissor.extent.height = ctx->fb_state.height;
390 vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
391 }
392
393 if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
394 if (screen->feats.wideLines || ctx->line_width == 1.0f)
395 vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
396 else
397 debug_printf("BUG: wide lines not supported, needs fallback!");
398 }
399
400 vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref.ref_value[0]);
401 vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref.ref_value[1]);
402
403 if (depth_bias)
404 vkCmdSetDepthBias(batch->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
405 else
406 vkCmdSetDepthBias(batch->cmdbuf, 0.0f, 0.0f, 0.0f);
407
408 if (ctx->gfx_pipeline_state.blend_state->need_blend_constants)
409 vkCmdSetBlendConstants(batch->cmdbuf, ctx->blend_constants);
410
411 if (num_wds > 0) {
412 for (int i = 0; i < num_wds; ++i)
413 wds[i].dstSet = desc_set;
414 vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
415 }
416
417 vkCmdBindPipeline(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
418 vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
419 gfx_program->layout, 0, 1, &desc_set, 0, NULL);
420 zink_bind_vertex_buffers(batch, ctx);
421
422 if (ctx->num_so_targets) {
423 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
424 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
425 struct zink_resource *res = zink_resource(t->counter_buffer);
426 if (t->counter_buffer_valid) {
427 zink_batch_reference_resoure(batch, zink_resource(t->counter_buffer));
428 counter_buffers[i] = res->buffer;
429 counter_buffer_offsets[i] = t->counter_buffer_offset;
430 } else
431 counter_buffers[i] = VK_NULL_HANDLE;
432 }
433 screen->vk_CmdBeginTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
434 }
435
436 if (dinfo->index_size > 0) {
437 VkIndexType index_type;
438 unsigned index_size = dinfo->index_size;
439 if (need_index_buffer_unref)
440 /* index buffer will have been promoted from uint8 to uint16 in this case */
441 index_size = MAX2(index_size, 2);
442 switch (index_size) {
443 case 1:
444 assert(screen->have_EXT_index_type_uint8);
445 index_type = VK_INDEX_TYPE_UINT8_EXT;
446 break;
447 case 2:
448 index_type = VK_INDEX_TYPE_UINT16;
449 break;
450 case 4:
451 index_type = VK_INDEX_TYPE_UINT32;
452 break;
453 default:
454 unreachable("unknown index size!");
455 }
456 struct zink_resource *res = zink_resource(index_buffer);
457 vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type);
458 zink_batch_reference_resoure(batch, res);
459 vkCmdDrawIndexed(batch->cmdbuf,
460 dinfo->count, dinfo->instance_count,
461 need_index_buffer_unref ? 0 : dinfo->start, dinfo->index_bias, dinfo->start_instance);
462 } else {
463 if (so_target && screen->tf_props.transformFeedbackDraw) {
464 zink_batch_reference_resoure(batch, zink_resource(so_target->counter_buffer));
465 screen->vk_CmdDrawIndirectByteCountEXT(batch->cmdbuf, dinfo->instance_count, dinfo->start_instance,
466 zink_resource(so_target->counter_buffer)->buffer, so_target->counter_buffer_offset, 0,
467 MIN2(so_target->stride, screen->tf_props.maxTransformFeedbackBufferDataStride));
468 }
469 else
470 vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
471 }
472
473 if (dinfo->index_size > 0 && (dinfo->has_user_indices || need_index_buffer_unref))
474 pipe_resource_reference(&index_buffer, NULL);
475
476 if (ctx->num_so_targets) {
477 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
478 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
479 counter_buffers[i] = zink_resource(t->counter_buffer)->buffer;
480 counter_buffer_offsets[i] = t->counter_buffer_offset;
481 t->counter_buffer_valid = true;
482 zink_resource(ctx->so_targets[i]->buffer)->needs_xfb_barrier = true;
483 }
484 screen->vk_CmdEndTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
485 }
486 }