radv: rework creation of decompress/resummarize meta pipelines
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 enum radv_depth_op {
32 DEPTH_DECOMPRESS,
33 DEPTH_RESUMMARIZE,
34 };
35
36 static VkResult
37 create_pass(struct radv_device *device,
38 uint32_t samples,
39 VkRenderPass *pass)
40 {
41 VkResult result;
42 VkDevice device_h = radv_device_to_handle(device);
43 const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
44 VkAttachmentDescription attachment;
45
46 attachment.flags = 0;
47 attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
48 attachment.samples = samples;
49 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
50 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
51 attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
52 attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
53 attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
54 attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
55
56 result = radv_CreateRenderPass(device_h,
57 &(VkRenderPassCreateInfo) {
58 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
59 .attachmentCount = 1,
60 .pAttachments = &attachment,
61 .subpassCount = 1,
62 .pSubpasses = &(VkSubpassDescription) {
63 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
64 .inputAttachmentCount = 0,
65 .colorAttachmentCount = 0,
66 .pColorAttachments = NULL,
67 .pResolveAttachments = NULL,
68 .pDepthStencilAttachment = &(VkAttachmentReference) {
69 .attachment = 0,
70 .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
71 },
72 .preserveAttachmentCount = 0,
73 .pPreserveAttachments = NULL,
74 },
75 .dependencyCount = 0,
76 },
77 alloc,
78 pass);
79
80 return result;
81 }
82
83 static VkResult
84 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
85 {
86 VkPipelineLayoutCreateInfo pl_create_info = {
87 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
88 .setLayoutCount = 0,
89 .pSetLayouts = NULL,
90 .pushConstantRangeCount = 0,
91 .pPushConstantRanges = NULL,
92 };
93
94 return radv_CreatePipelineLayout(radv_device_to_handle(device),
95 &pl_create_info,
96 &device->meta_state.alloc,
97 layout);
98 }
99
100 static VkResult
101 create_pipeline(struct radv_device *device,
102 VkShaderModule vs_module_h,
103 uint32_t samples,
104 VkRenderPass pass,
105 VkPipelineLayout layout,
106 enum radv_depth_op op,
107 VkPipeline *pipeline)
108 {
109 VkResult result;
110 VkDevice device_h = radv_device_to_handle(device);
111 struct radv_shader_module vs_module = {0};
112
113 mtx_lock(&device->meta_state.mtx);
114 if (*pipeline) {
115 mtx_unlock(&device->meta_state.mtx);
116 return VK_SUCCESS;
117 }
118
119 if (!vs_module_h) {
120 vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
121 vs_module_h = radv_shader_module_to_handle(&vs_module);
122 }
123
124 struct radv_shader_module fs_module = {
125 .nir = radv_meta_build_nir_fs_noop(),
126 };
127
128 if (!fs_module.nir) {
129 /* XXX: Need more accurate error */
130 result = VK_ERROR_OUT_OF_HOST_MEMORY;
131 goto cleanup;
132 }
133
134 const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
135 .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
136 .sampleLocationsEnable = false,
137 };
138
139 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
140 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
141 .stageCount = 2,
142 .pStages = (VkPipelineShaderStageCreateInfo[]) {
143 {
144 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
145 .stage = VK_SHADER_STAGE_VERTEX_BIT,
146 .module = vs_module_h,
147 .pName = "main",
148 },
149 {
150 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
151 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
152 .module = radv_shader_module_to_handle(&fs_module),
153 .pName = "main",
154 },
155 },
156 .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
157 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
158 .vertexBindingDescriptionCount = 0,
159 .vertexAttributeDescriptionCount = 0,
160 },
161 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
162 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
163 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
164 .primitiveRestartEnable = false,
165 },
166 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
167 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
168 .viewportCount = 1,
169 .scissorCount = 1,
170 },
171 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
172 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
173 .depthClampEnable = false,
174 .rasterizerDiscardEnable = false,
175 .polygonMode = VK_POLYGON_MODE_FILL,
176 .cullMode = VK_CULL_MODE_NONE,
177 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
178 },
179 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
180 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
181 .pNext = &sample_locs_create_info,
182 .rasterizationSamples = samples,
183 .sampleShadingEnable = false,
184 .pSampleMask = NULL,
185 .alphaToCoverageEnable = false,
186 .alphaToOneEnable = false,
187 },
188 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
189 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
190 .logicOpEnable = false,
191 .attachmentCount = 0,
192 .pAttachments = NULL,
193 },
194 .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
195 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
196 .depthTestEnable = false,
197 .depthWriteEnable = false,
198 .depthBoundsTestEnable = false,
199 .stencilTestEnable = false,
200 },
201 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
202 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
203 .dynamicStateCount = 3,
204 .pDynamicStates = (VkDynamicState[]) {
205 VK_DYNAMIC_STATE_VIEWPORT,
206 VK_DYNAMIC_STATE_SCISSOR,
207 VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
208 },
209 },
210 .layout = layout,
211 .renderPass = pass,
212 .subpass = 0,
213 };
214
215 struct radv_graphics_pipeline_create_info extra = {
216 .use_rectlist = true,
217 .db_flush_depth_inplace = true,
218 .db_flush_stencil_inplace = true,
219 .db_resummarize = op == DEPTH_RESUMMARIZE,
220 };
221
222 result = radv_graphics_pipeline_create(device_h,
223 radv_pipeline_cache_to_handle(&device->meta_state.cache),
224 &pipeline_create_info, &extra,
225 &device->meta_state.alloc,
226 pipeline);
227
228 cleanup:
229 ralloc_free(fs_module.nir);
230 if (vs_module.nir)
231 ralloc_free(vs_module.nir);
232 mtx_unlock(&device->meta_state.mtx);
233 return result;
234 }
235
236 void
237 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
238 {
239 struct radv_meta_state *state = &device->meta_state;
240
241 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
242 radv_DestroyRenderPass(radv_device_to_handle(device),
243 state->depth_decomp[i].pass,
244 &state->alloc);
245 radv_DestroyPipelineLayout(radv_device_to_handle(device),
246 state->depth_decomp[i].p_layout,
247 &state->alloc);
248 radv_DestroyPipeline(radv_device_to_handle(device),
249 state->depth_decomp[i].decompress_pipeline,
250 &state->alloc);
251 radv_DestroyPipeline(radv_device_to_handle(device),
252 state->depth_decomp[i].resummarize_pipeline,
253 &state->alloc);
254 }
255 }
256
257 VkResult
258 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
259 {
260 struct radv_meta_state *state = &device->meta_state;
261 VkResult res = VK_SUCCESS;
262
263 struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
264 if (!vs_module.nir) {
265 /* XXX: Need more accurate error */
266 res = VK_ERROR_OUT_OF_HOST_MEMORY;
267 goto fail;
268 }
269
270 VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
271
272 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
273 uint32_t samples = 1 << i;
274
275 res = create_pass(device, samples, &state->depth_decomp[i].pass);
276 if (res != VK_SUCCESS)
277 goto fail;
278
279 res = create_pipeline_layout(device,
280 &state->depth_decomp[i].p_layout);
281 if (res != VK_SUCCESS)
282 goto fail;
283
284 if (on_demand)
285 continue;
286
287 res = create_pipeline(device, vs_module_h, samples,
288 state->depth_decomp[i].pass,
289 state->depth_decomp[i].p_layout,
290 DEPTH_DECOMPRESS,
291 &state->depth_decomp[i].decompress_pipeline);
292 if (res != VK_SUCCESS)
293 goto fail;
294
295 res = create_pipeline(device, vs_module_h, samples,
296 state->depth_decomp[i].pass,
297 state->depth_decomp[i].p_layout,
298 DEPTH_RESUMMARIZE,
299 &state->depth_decomp[i].resummarize_pipeline);
300 if (res != VK_SUCCESS)
301 goto fail;
302 }
303
304 goto cleanup;
305
306 fail:
307 radv_device_finish_meta_depth_decomp_state(device);
308
309 cleanup:
310 ralloc_free(vs_module.nir);
311
312 return res;
313 }
314
315 static VkPipeline *
316 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
317 struct radv_image *image, enum radv_depth_op op)
318 {
319 struct radv_meta_state *state = &cmd_buffer->device->meta_state;
320 uint32_t samples = image->info.samples;
321 uint32_t samples_log2 = ffs(samples) - 1;
322 VkPipeline *pipeline;
323
324 if (!state->depth_decomp[samples_log2].decompress_pipeline) {
325 VkResult ret;
326
327 ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
328 state->depth_decomp[samples_log2].pass,
329 state->depth_decomp[samples_log2].p_layout,
330 DEPTH_DECOMPRESS,
331 &state->depth_decomp[samples_log2].decompress_pipeline);
332 if (ret != VK_SUCCESS) {
333 cmd_buffer->record_result = ret;
334 return NULL;
335 }
336
337 ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
338 state->depth_decomp[samples_log2].pass,
339 state->depth_decomp[samples_log2].p_layout,
340 DEPTH_RESUMMARIZE,
341 &state->depth_decomp[samples_log2].resummarize_pipeline);
342 if (ret != VK_SUCCESS) {
343 cmd_buffer->record_result = ret;
344 return NULL;
345 }
346 }
347
348 switch (op) {
349 case DEPTH_DECOMPRESS:
350 pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
351 break;
352 case DEPTH_RESUMMARIZE:
353 pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
354 break;
355 default:
356 unreachable("unknown operation");
357 }
358
359 return pipeline;
360 }
361
362 static void
363 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
364 struct radv_image *image,
365 const VkImageSubresourceRange *range,
366 int level, int layer)
367 {
368 struct radv_device *device = cmd_buffer->device;
369 struct radv_meta_state *state = &device->meta_state;
370 uint32_t samples_log2 = ffs(image->info.samples) - 1;
371 struct radv_image_view iview;
372 uint32_t width, height;
373
374 width = radv_minify(image->info.width, range->baseMipLevel + level);
375 height = radv_minify(image->info.height, range->baseMipLevel + level);
376
377 radv_image_view_init(&iview, device,
378 &(VkImageViewCreateInfo) {
379 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
380 .image = radv_image_to_handle(image),
381 .viewType = radv_meta_get_view_type(image),
382 .format = image->vk_format,
383 .subresourceRange = {
384 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
385 .baseMipLevel = range->baseMipLevel + level,
386 .levelCount = 1,
387 .baseArrayLayer = range->baseArrayLayer + layer,
388 .layerCount = 1,
389 },
390 }, NULL);
391
392
393 VkFramebuffer fb_h;
394 radv_CreateFramebuffer(radv_device_to_handle(device),
395 &(VkFramebufferCreateInfo) {
396 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
397 .attachmentCount = 1,
398 .pAttachments = (VkImageView[]) {
399 radv_image_view_to_handle(&iview)
400 },
401 .width = width,
402 .height = height,
403 .layers = 1
404 }, &cmd_buffer->pool->alloc, &fb_h);
405
406 radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
407 &(VkRenderPassBeginInfo) {
408 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
409 .renderPass = state->depth_decomp[samples_log2].pass,
410 .framebuffer = fb_h,
411 .renderArea = {
412 .offset = {
413 0,
414 0,
415 },
416 .extent = {
417 width,
418 height,
419 }
420 },
421 .clearValueCount = 0,
422 .pClearValues = NULL,
423 },
424 VK_SUBPASS_CONTENTS_INLINE);
425
426 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
427 radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
428
429 radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
430 &cmd_buffer->pool->alloc);
431 }
432
433 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
434 struct radv_image *image,
435 const VkImageSubresourceRange *subresourceRange,
436 struct radv_sample_locations_state *sample_locs,
437 enum radv_depth_op op)
438 {
439 struct radv_meta_saved_state saved_state;
440 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
441 VkPipeline *pipeline;
442
443 if (!radv_image_has_htile(image))
444 return;
445
446 radv_meta_save(&saved_state, cmd_buffer,
447 RADV_META_SAVE_GRAPHICS_PIPELINE |
448 RADV_META_SAVE_SAMPLE_LOCATIONS |
449 RADV_META_SAVE_PASS);
450
451 pipeline = radv_get_depth_pipeline(cmd_buffer, image, op);
452
453 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
454 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
455
456 if (sample_locs) {
457 assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
458
459 /* Set the sample locations specified during explicit or
460 * automatic layout transitions, otherwise the depth decompress
461 * pass uses the default HW locations.
462 */
463 radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
464 .sampleLocationsPerPixel = sample_locs->per_pixel,
465 .sampleLocationGridSize = sample_locs->grid_size,
466 .sampleLocationsCount = sample_locs->count,
467 .pSampleLocations = sample_locs->locations,
468 });
469 }
470
471 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
472 uint32_t width =
473 radv_minify(image->info.width,
474 subresourceRange->baseMipLevel + l);
475 uint32_t height =
476 radv_minify(image->info.height,
477 subresourceRange->baseMipLevel + l);
478
479 radv_CmdSetViewport(cmd_buffer_h, 0, 1,
480 &(VkViewport) {
481 .x = 0,
482 .y = 0,
483 .width = width,
484 .height = height,
485 .minDepth = 0.0f,
486 .maxDepth = 1.0f
487 });
488
489 radv_CmdSetScissor(cmd_buffer_h, 0, 1,
490 &(VkRect2D) {
491 .offset = { 0, 0 },
492 .extent = { width, height },
493 });
494
495 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
496 radv_process_depth_image_layer(cmd_buffer, image,
497 subresourceRange, l, s);
498 }
499 }
500
501 radv_meta_restore(&saved_state, cmd_buffer);
502 }
503
504 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
505 struct radv_image *image,
506 const VkImageSubresourceRange *subresourceRange,
507 struct radv_sample_locations_state *sample_locs)
508 {
509 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
510 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
511 sample_locs, DEPTH_DECOMPRESS);
512 }
513
514 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
515 struct radv_image *image,
516 const VkImageSubresourceRange *subresourceRange,
517 struct radv_sample_locations_state *sample_locs)
518 {
519 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
520 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
521 sample_locs, DEPTH_RESUMMARIZE);
522 }