radv: Fix threading issue with submission refcounts.
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 enum radv_depth_op {
32 DEPTH_DECOMPRESS,
33 DEPTH_RESUMMARIZE,
34 };
35
36 enum radv_depth_decompress {
37 DECOMPRESS_DEPTH_STENCIL,
38 DECOMPRESS_DEPTH,
39 DECOMPRESS_STENCIL,
40 };
41
42 static VkResult
43 create_pass(struct radv_device *device,
44 uint32_t samples,
45 VkRenderPass *pass)
46 {
47 VkResult result;
48 VkDevice device_h = radv_device_to_handle(device);
49 const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
50 VkAttachmentDescription attachment;
51
52 attachment.flags = 0;
53 attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
54 attachment.samples = samples;
55 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
56 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
57 attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
58 attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
59 attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
60 attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
61
62 result = radv_CreateRenderPass(device_h,
63 &(VkRenderPassCreateInfo) {
64 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
65 .attachmentCount = 1,
66 .pAttachments = &attachment,
67 .subpassCount = 1,
68 .pSubpasses = &(VkSubpassDescription) {
69 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
70 .inputAttachmentCount = 0,
71 .colorAttachmentCount = 0,
72 .pColorAttachments = NULL,
73 .pResolveAttachments = NULL,
74 .pDepthStencilAttachment = &(VkAttachmentReference) {
75 .attachment = 0,
76 .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
77 },
78 .preserveAttachmentCount = 0,
79 .pPreserveAttachments = NULL,
80 },
81 .dependencyCount = 2,
82 .pDependencies = (VkSubpassDependency[]) {
83 {
84 .srcSubpass = VK_SUBPASS_EXTERNAL,
85 .dstSubpass = 0,
86 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
87 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
88 .srcAccessMask = 0,
89 .dstAccessMask = 0,
90 .dependencyFlags = 0
91 },
92 {
93 .srcSubpass = 0,
94 .dstSubpass = VK_SUBPASS_EXTERNAL,
95 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
96 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
97 .srcAccessMask = 0,
98 .dstAccessMask = 0,
99 .dependencyFlags = 0
100 }
101 },
102 },
103 alloc,
104 pass);
105
106 return result;
107 }
108
109 static VkResult
110 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
111 {
112 VkPipelineLayoutCreateInfo pl_create_info = {
113 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
114 .setLayoutCount = 0,
115 .pSetLayouts = NULL,
116 .pushConstantRangeCount = 0,
117 .pPushConstantRanges = NULL,
118 };
119
120 return radv_CreatePipelineLayout(radv_device_to_handle(device),
121 &pl_create_info,
122 &device->meta_state.alloc,
123 layout);
124 }
125
126 static VkResult
127 create_pipeline(struct radv_device *device,
128 uint32_t samples,
129 VkRenderPass pass,
130 VkPipelineLayout layout,
131 enum radv_depth_op op,
132 enum radv_depth_decompress decompress,
133 VkPipeline *pipeline)
134 {
135 VkResult result;
136 VkDevice device_h = radv_device_to_handle(device);
137
138 mtx_lock(&device->meta_state.mtx);
139 if (*pipeline) {
140 mtx_unlock(&device->meta_state.mtx);
141 return VK_SUCCESS;
142 }
143
144 struct radv_shader_module vs_module = {
145 .nir = radv_meta_build_nir_vs_generate_vertices()
146 };
147
148 if (!vs_module.nir) {
149 /* XXX: Need more accurate error */
150 result = VK_ERROR_OUT_OF_HOST_MEMORY;
151 goto cleanup;
152 }
153
154 struct radv_shader_module fs_module = {
155 .nir = radv_meta_build_nir_fs_noop(),
156 };
157
158 if (!fs_module.nir) {
159 /* XXX: Need more accurate error */
160 result = VK_ERROR_OUT_OF_HOST_MEMORY;
161 goto cleanup;
162 }
163
164 const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
165 .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
166 .sampleLocationsEnable = false,
167 };
168
169 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
170 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
171 .stageCount = 2,
172 .pStages = (VkPipelineShaderStageCreateInfo[]) {
173 {
174 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
175 .stage = VK_SHADER_STAGE_VERTEX_BIT,
176 .module = radv_shader_module_to_handle(&vs_module),
177 .pName = "main",
178 },
179 {
180 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
181 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
182 .module = radv_shader_module_to_handle(&fs_module),
183 .pName = "main",
184 },
185 },
186 .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
187 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
188 .vertexBindingDescriptionCount = 0,
189 .vertexAttributeDescriptionCount = 0,
190 },
191 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
192 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
193 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
194 .primitiveRestartEnable = false,
195 },
196 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
198 .viewportCount = 1,
199 .scissorCount = 1,
200 },
201 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
202 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
203 .depthClampEnable = false,
204 .rasterizerDiscardEnable = false,
205 .polygonMode = VK_POLYGON_MODE_FILL,
206 .cullMode = VK_CULL_MODE_NONE,
207 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
208 },
209 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
210 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
211 .pNext = &sample_locs_create_info,
212 .rasterizationSamples = samples,
213 .sampleShadingEnable = false,
214 .pSampleMask = NULL,
215 .alphaToCoverageEnable = false,
216 .alphaToOneEnable = false,
217 },
218 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
219 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
220 .logicOpEnable = false,
221 .attachmentCount = 0,
222 .pAttachments = NULL,
223 },
224 .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
225 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
226 .depthTestEnable = false,
227 .depthWriteEnable = false,
228 .depthBoundsTestEnable = false,
229 .stencilTestEnable = false,
230 },
231 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
232 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
233 .dynamicStateCount = 3,
234 .pDynamicStates = (VkDynamicState[]) {
235 VK_DYNAMIC_STATE_VIEWPORT,
236 VK_DYNAMIC_STATE_SCISSOR,
237 VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
238 },
239 },
240 .layout = layout,
241 .renderPass = pass,
242 .subpass = 0,
243 };
244
245 struct radv_graphics_pipeline_create_info extra = {
246 .use_rectlist = true,
247 .depth_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
248 decompress == DECOMPRESS_DEPTH,
249 .stencil_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
250 decompress == DECOMPRESS_STENCIL,
251 .resummarize_enable = op == DEPTH_RESUMMARIZE,
252 };
253
254 result = radv_graphics_pipeline_create(device_h,
255 radv_pipeline_cache_to_handle(&device->meta_state.cache),
256 &pipeline_create_info, &extra,
257 &device->meta_state.alloc,
258 pipeline);
259
260 cleanup:
261 ralloc_free(fs_module.nir);
262 ralloc_free(vs_module.nir);
263 mtx_unlock(&device->meta_state.mtx);
264 return result;
265 }
266
267 void
268 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
269 {
270 struct radv_meta_state *state = &device->meta_state;
271
272 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
273 radv_DestroyRenderPass(radv_device_to_handle(device),
274 state->depth_decomp[i].pass,
275 &state->alloc);
276 radv_DestroyPipelineLayout(radv_device_to_handle(device),
277 state->depth_decomp[i].p_layout,
278 &state->alloc);
279
280 for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
281 radv_DestroyPipeline(radv_device_to_handle(device),
282 state->depth_decomp[i].decompress_pipeline[j],
283 &state->alloc);
284 }
285 radv_DestroyPipeline(radv_device_to_handle(device),
286 state->depth_decomp[i].resummarize_pipeline,
287 &state->alloc);
288 }
289 }
290
291 VkResult
292 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
293 {
294 struct radv_meta_state *state = &device->meta_state;
295 VkResult res = VK_SUCCESS;
296
297 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
298 uint32_t samples = 1 << i;
299
300 res = create_pass(device, samples, &state->depth_decomp[i].pass);
301 if (res != VK_SUCCESS)
302 goto fail;
303
304 res = create_pipeline_layout(device,
305 &state->depth_decomp[i].p_layout);
306 if (res != VK_SUCCESS)
307 goto fail;
308
309 if (on_demand)
310 continue;
311
312 for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
313 res = create_pipeline(device, samples,
314 state->depth_decomp[i].pass,
315 state->depth_decomp[i].p_layout,
316 DEPTH_DECOMPRESS,
317 j,
318 &state->depth_decomp[i].decompress_pipeline[j]);
319 if (res != VK_SUCCESS)
320 goto fail;
321 }
322
323 res = create_pipeline(device, samples,
324 state->depth_decomp[i].pass,
325 state->depth_decomp[i].p_layout,
326 DEPTH_RESUMMARIZE,
327 0, /* unused */
328 &state->depth_decomp[i].resummarize_pipeline);
329 if (res != VK_SUCCESS)
330 goto fail;
331 }
332
333 return VK_SUCCESS;
334
335 fail:
336 radv_device_finish_meta_depth_decomp_state(device);
337 return res;
338 }
339
340 static VkPipeline *
341 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
342 struct radv_image *image,
343 const VkImageSubresourceRange *subresourceRange,
344 enum radv_depth_op op)
345 {
346 struct radv_meta_state *state = &cmd_buffer->device->meta_state;
347 uint32_t samples = image->info.samples;
348 uint32_t samples_log2 = ffs(samples) - 1;
349 enum radv_depth_decompress decompress;
350 VkPipeline *pipeline;
351
352 if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
353 decompress = DECOMPRESS_DEPTH;
354 } else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
355 decompress = DECOMPRESS_STENCIL;
356 } else {
357 decompress = DECOMPRESS_DEPTH_STENCIL;
358 }
359
360 if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
361 VkResult ret;
362
363 for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
364 ret = create_pipeline(cmd_buffer->device, samples,
365 state->depth_decomp[samples_log2].pass,
366 state->depth_decomp[samples_log2].p_layout,
367 DEPTH_DECOMPRESS,
368 i,
369 &state->depth_decomp[samples_log2].decompress_pipeline[i]);
370 if (ret != VK_SUCCESS) {
371 cmd_buffer->record_result = ret;
372 return NULL;
373 }
374 }
375
376 ret = create_pipeline(cmd_buffer->device, samples,
377 state->depth_decomp[samples_log2].pass,
378 state->depth_decomp[samples_log2].p_layout,
379 DEPTH_RESUMMARIZE,
380 0, /* unused */
381 &state->depth_decomp[samples_log2].resummarize_pipeline);
382 if (ret != VK_SUCCESS) {
383 cmd_buffer->record_result = ret;
384 return NULL;
385 }
386 }
387
388 switch (op) {
389 case DEPTH_DECOMPRESS:
390 pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
391 break;
392 case DEPTH_RESUMMARIZE:
393 pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
394 break;
395 default:
396 unreachable("unknown operation");
397 }
398
399 return pipeline;
400 }
401
402 static void
403 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
404 struct radv_image *image,
405 const VkImageSubresourceRange *range,
406 int level, int layer)
407 {
408 struct radv_device *device = cmd_buffer->device;
409 struct radv_meta_state *state = &device->meta_state;
410 uint32_t samples_log2 = ffs(image->info.samples) - 1;
411 struct radv_image_view iview;
412 uint32_t width, height;
413
414 width = radv_minify(image->info.width, range->baseMipLevel + level);
415 height = radv_minify(image->info.height, range->baseMipLevel + level);
416
417 radv_image_view_init(&iview, device,
418 &(VkImageViewCreateInfo) {
419 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
420 .image = radv_image_to_handle(image),
421 .viewType = radv_meta_get_view_type(image),
422 .format = image->vk_format,
423 .subresourceRange = {
424 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
425 .baseMipLevel = range->baseMipLevel + level,
426 .levelCount = 1,
427 .baseArrayLayer = range->baseArrayLayer + layer,
428 .layerCount = 1,
429 },
430 }, NULL);
431
432
433 VkFramebuffer fb_h;
434 radv_CreateFramebuffer(radv_device_to_handle(device),
435 &(VkFramebufferCreateInfo) {
436 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
437 .attachmentCount = 1,
438 .pAttachments = (VkImageView[]) {
439 radv_image_view_to_handle(&iview)
440 },
441 .width = width,
442 .height = height,
443 .layers = 1
444 }, &cmd_buffer->pool->alloc, &fb_h);
445
446 radv_cmd_buffer_begin_render_pass(cmd_buffer,
447 &(VkRenderPassBeginInfo) {
448 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
449 .renderPass = state->depth_decomp[samples_log2].pass,
450 .framebuffer = fb_h,
451 .renderArea = {
452 .offset = {
453 0,
454 0,
455 },
456 .extent = {
457 width,
458 height,
459 }
460 },
461 .clearValueCount = 0,
462 .pClearValues = NULL,
463 });
464 radv_cmd_buffer_set_subpass(cmd_buffer,
465 &cmd_buffer->state.pass->subpasses[0]);
466
467 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
468 radv_cmd_buffer_end_render_pass(cmd_buffer);
469
470 radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
471 &cmd_buffer->pool->alloc);
472 }
473
474 static void radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
475 struct radv_image *image,
476 const VkImageSubresourceRange *subresourceRange,
477 struct radv_sample_locations_state *sample_locs,
478 enum radv_depth_op op)
479 {
480 struct radv_meta_saved_state saved_state;
481 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
482 VkPipeline *pipeline;
483
484 if (!radv_image_has_htile(image))
485 return;
486
487 radv_meta_save(&saved_state, cmd_buffer,
488 RADV_META_SAVE_GRAPHICS_PIPELINE |
489 RADV_META_SAVE_SAMPLE_LOCATIONS |
490 RADV_META_SAVE_PASS);
491
492 pipeline = radv_get_depth_pipeline(cmd_buffer, image,
493 subresourceRange, op);
494
495 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
496 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
497
498 if (sample_locs) {
499 assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
500
501 /* Set the sample locations specified during explicit or
502 * automatic layout transitions, otherwise the depth decompress
503 * pass uses the default HW locations.
504 */
505 radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
506 .sampleLocationsPerPixel = sample_locs->per_pixel,
507 .sampleLocationGridSize = sample_locs->grid_size,
508 .sampleLocationsCount = sample_locs->count,
509 .pSampleLocations = sample_locs->locations,
510 });
511 }
512
513 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
514 uint32_t width =
515 radv_minify(image->info.width,
516 subresourceRange->baseMipLevel + l);
517 uint32_t height =
518 radv_minify(image->info.height,
519 subresourceRange->baseMipLevel + l);
520
521 radv_CmdSetViewport(cmd_buffer_h, 0, 1,
522 &(VkViewport) {
523 .x = 0,
524 .y = 0,
525 .width = width,
526 .height = height,
527 .minDepth = 0.0f,
528 .maxDepth = 1.0f
529 });
530
531 radv_CmdSetScissor(cmd_buffer_h, 0, 1,
532 &(VkRect2D) {
533 .offset = { 0, 0 },
534 .extent = { width, height },
535 });
536
537 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
538 radv_process_depth_image_layer(cmd_buffer, image,
539 subresourceRange, l, s);
540 }
541 }
542
543 radv_meta_restore(&saved_state, cmd_buffer);
544 }
545
546 void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
547 struct radv_image *image,
548 const VkImageSubresourceRange *subresourceRange,
549 struct radv_sample_locations_state *sample_locs)
550 {
551 struct radv_barrier_data barrier = {};
552
553 barrier.layout_transitions.depth_stencil_expand = 1;
554 radv_describe_layout_transition(cmd_buffer, &barrier);
555
556 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
557 radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
558 sample_locs, DEPTH_DECOMPRESS);
559 }
560
561 void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
562 struct radv_image *image,
563 const VkImageSubresourceRange *subresourceRange,
564 struct radv_sample_locations_state *sample_locs)
565 {
566 struct radv_barrier_data barrier = {};
567
568 barrier.layout_transitions.depth_stencil_resummarize = 1;
569 radv_describe_layout_transition(cmd_buffer, &barrier);
570
571 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
572 radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
573 sample_locs, DEPTH_RESUMMARIZE);
574 }