3a0228c9dea6cd4a925cb6df79df502aa470116e
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 static VkResult
32 create_pass(struct radv_device *device,
33 uint32_t samples,
34 VkRenderPass *pass)
35 {
36 VkResult result;
37 VkDevice device_h = radv_device_to_handle(device);
38 const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
39 VkAttachmentDescription attachment;
40
41 attachment.flags = 0;
42 attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
43 attachment.samples = samples;
44 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
45 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
46 attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
47 attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
48 attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
49 attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
50
51 result = radv_CreateRenderPass(device_h,
52 &(VkRenderPassCreateInfo) {
53 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
54 .attachmentCount = 1,
55 .pAttachments = &attachment,
56 .subpassCount = 1,
57 .pSubpasses = &(VkSubpassDescription) {
58 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
59 .inputAttachmentCount = 0,
60 .colorAttachmentCount = 0,
61 .pColorAttachments = NULL,
62 .pResolveAttachments = NULL,
63 .pDepthStencilAttachment = &(VkAttachmentReference) {
64 .attachment = 0,
65 .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
66 },
67 .preserveAttachmentCount = 0,
68 .pPreserveAttachments = NULL,
69 },
70 .dependencyCount = 0,
71 },
72 alloc,
73 pass);
74
75 return result;
76 }
77
78 static VkResult
79 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
80 {
81 VkPipelineLayoutCreateInfo pl_create_info = {
82 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
83 .setLayoutCount = 0,
84 .pSetLayouts = NULL,
85 .pushConstantRangeCount = 0,
86 .pPushConstantRanges = NULL,
87 };
88
89 return radv_CreatePipelineLayout(radv_device_to_handle(device),
90 &pl_create_info,
91 &device->meta_state.alloc,
92 layout);
93 }
94
95 static VkResult
96 create_pipeline(struct radv_device *device,
97 VkShaderModule vs_module_h,
98 uint32_t samples,
99 VkRenderPass pass,
100 VkPipelineLayout layout,
101 VkPipeline *decompress_pipeline,
102 VkPipeline *resummarize_pipeline)
103 {
104 VkResult result;
105 VkDevice device_h = radv_device_to_handle(device);
106 struct radv_shader_module vs_module = {0};
107
108 mtx_lock(&device->meta_state.mtx);
109 if (*decompress_pipeline) {
110 mtx_unlock(&device->meta_state.mtx);
111 return VK_SUCCESS;
112 }
113
114 if (!vs_module_h) {
115 vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
116 vs_module_h = radv_shader_module_to_handle(&vs_module);
117 }
118
119 struct radv_shader_module fs_module = {
120 .nir = radv_meta_build_nir_fs_noop(),
121 };
122
123 if (!fs_module.nir) {
124 /* XXX: Need more accurate error */
125 result = VK_ERROR_OUT_OF_HOST_MEMORY;
126 goto cleanup;
127 }
128
129 const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
130 .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
131 .sampleLocationsEnable = false,
132 };
133
134 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
135 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
136 .stageCount = 2,
137 .pStages = (VkPipelineShaderStageCreateInfo[]) {
138 {
139 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
140 .stage = VK_SHADER_STAGE_VERTEX_BIT,
141 .module = vs_module_h,
142 .pName = "main",
143 },
144 {
145 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
146 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
147 .module = radv_shader_module_to_handle(&fs_module),
148 .pName = "main",
149 },
150 },
151 .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
152 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
153 .vertexBindingDescriptionCount = 0,
154 .vertexAttributeDescriptionCount = 0,
155 },
156 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
157 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
158 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
159 .primitiveRestartEnable = false,
160 },
161 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
162 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
163 .viewportCount = 1,
164 .scissorCount = 1,
165 },
166 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
167 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
168 .depthClampEnable = false,
169 .rasterizerDiscardEnable = false,
170 .polygonMode = VK_POLYGON_MODE_FILL,
171 .cullMode = VK_CULL_MODE_NONE,
172 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
173 },
174 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
175 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
176 .pNext = &sample_locs_create_info,
177 .rasterizationSamples = samples,
178 .sampleShadingEnable = false,
179 .pSampleMask = NULL,
180 .alphaToCoverageEnable = false,
181 .alphaToOneEnable = false,
182 },
183 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
184 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
185 .logicOpEnable = false,
186 .attachmentCount = 0,
187 .pAttachments = NULL,
188 },
189 .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
190 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
191 .depthTestEnable = false,
192 .depthWriteEnable = false,
193 .depthBoundsTestEnable = false,
194 .stencilTestEnable = false,
195 },
196 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
198 .dynamicStateCount = 3,
199 .pDynamicStates = (VkDynamicState[]) {
200 VK_DYNAMIC_STATE_VIEWPORT,
201 VK_DYNAMIC_STATE_SCISSOR,
202 VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
203 },
204 },
205 .layout = layout,
206 .renderPass = pass,
207 .subpass = 0,
208 };
209
210 result = radv_graphics_pipeline_create(device_h,
211 radv_pipeline_cache_to_handle(&device->meta_state.cache),
212 &pipeline_create_info,
213 &(struct radv_graphics_pipeline_create_info) {
214 .use_rectlist = true,
215 .db_flush_depth_inplace = true,
216 .db_flush_stencil_inplace = true,
217 },
218 &device->meta_state.alloc,
219 decompress_pipeline);
220 if (result != VK_SUCCESS)
221 goto cleanup;
222
223 result = radv_graphics_pipeline_create(device_h,
224 radv_pipeline_cache_to_handle(&device->meta_state.cache),
225 &pipeline_create_info,
226 &(struct radv_graphics_pipeline_create_info) {
227 .use_rectlist = true,
228 .db_flush_depth_inplace = true,
229 .db_flush_stencil_inplace = true,
230 .db_resummarize = true,
231 },
232 &device->meta_state.alloc,
233 resummarize_pipeline);
234 if (result != VK_SUCCESS)
235 goto cleanup;
236
237 goto cleanup;
238
239 cleanup:
240 ralloc_free(fs_module.nir);
241 if (vs_module.nir)
242 ralloc_free(vs_module.nir);
243 mtx_unlock(&device->meta_state.mtx);
244 return result;
245 }
246
247 void
248 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
249 {
250 struct radv_meta_state *state = &device->meta_state;
251
252 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
253 radv_DestroyRenderPass(radv_device_to_handle(device),
254 state->depth_decomp[i].pass,
255 &state->alloc);
256 radv_DestroyPipelineLayout(radv_device_to_handle(device),
257 state->depth_decomp[i].p_layout,
258 &state->alloc);
259 radv_DestroyPipeline(radv_device_to_handle(device),
260 state->depth_decomp[i].decompress_pipeline,
261 &state->alloc);
262 radv_DestroyPipeline(radv_device_to_handle(device),
263 state->depth_decomp[i].resummarize_pipeline,
264 &state->alloc);
265 }
266 }
267
268 VkResult
269 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
270 {
271 struct radv_meta_state *state = &device->meta_state;
272 VkResult res = VK_SUCCESS;
273
274 struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
275 if (!vs_module.nir) {
276 /* XXX: Need more accurate error */
277 res = VK_ERROR_OUT_OF_HOST_MEMORY;
278 goto fail;
279 }
280
281 VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
282
283 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
284 uint32_t samples = 1 << i;
285
286 res = create_pass(device, samples, &state->depth_decomp[i].pass);
287 if (res != VK_SUCCESS)
288 goto fail;
289
290 res = create_pipeline_layout(device,
291 &state->depth_decomp[i].p_layout);
292 if (res != VK_SUCCESS)
293 goto fail;
294
295 if (on_demand)
296 continue;
297
298 res = create_pipeline(device, vs_module_h, samples,
299 state->depth_decomp[i].pass,
300 state->depth_decomp[i].p_layout,
301 &state->depth_decomp[i].decompress_pipeline,
302 &state->depth_decomp[i].resummarize_pipeline);
303 if (res != VK_SUCCESS)
304 goto fail;
305 }
306
307 goto cleanup;
308
309 fail:
310 radv_device_finish_meta_depth_decomp_state(device);
311
312 cleanup:
313 ralloc_free(vs_module.nir);
314
315 return res;
316 }
317
318 enum radv_depth_op {
319 DEPTH_DECOMPRESS,
320 DEPTH_RESUMMARIZE,
321 };
322
323 static VkPipeline *
324 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
325 struct radv_image *image, enum radv_depth_op op)
326 {
327 struct radv_meta_state *state = &cmd_buffer->device->meta_state;
328 uint32_t samples = image->info.samples;
329 uint32_t samples_log2 = ffs(samples) - 1;
330 VkPipeline *pipeline;
331
332 if (!state->depth_decomp[samples_log2].decompress_pipeline) {
333 VkResult ret;
334
335 ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
336 state->depth_decomp[samples_log2].pass,
337 state->depth_decomp[samples_log2].p_layout,
338 &state->depth_decomp[samples_log2].decompress_pipeline,
339 &state->depth_decomp[samples_log2].resummarize_pipeline);
340 if (ret != VK_SUCCESS) {
341 cmd_buffer->record_result = ret;
342 return NULL;
343 }
344 }
345
346 switch (op) {
347 case DEPTH_DECOMPRESS:
348 pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
349 break;
350 case DEPTH_RESUMMARIZE:
351 pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
352 break;
353 default:
354 unreachable("unknown operation");
355 }
356
357 return pipeline;
358 }
359
360 static void
361 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
362 struct radv_image *image,
363 const VkImageSubresourceRange *range,
364 int level, int layer)
365 {
366 struct radv_device *device = cmd_buffer->device;
367 struct radv_meta_state *state = &device->meta_state;
368 uint32_t samples_log2 = ffs(image->info.samples) - 1;
369 struct radv_image_view iview;
370 uint32_t width, height;
371
372 width = radv_minify(image->info.width, range->baseMipLevel + level);
373 height = radv_minify(image->info.height, range->baseMipLevel + level);
374
375 radv_image_view_init(&iview, device,
376 &(VkImageViewCreateInfo) {
377 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
378 .image = radv_image_to_handle(image),
379 .viewType = radv_meta_get_view_type(image),
380 .format = image->vk_format,
381 .subresourceRange = {
382 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
383 .baseMipLevel = range->baseMipLevel + level,
384 .levelCount = 1,
385 .baseArrayLayer = range->baseArrayLayer + layer,
386 .layerCount = 1,
387 },
388 }, NULL);
389
390
391 VkFramebuffer fb_h;
392 radv_CreateFramebuffer(radv_device_to_handle(device),
393 &(VkFramebufferCreateInfo) {
394 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
395 .attachmentCount = 1,
396 .pAttachments = (VkImageView[]) {
397 radv_image_view_to_handle(&iview)
398 },
399 .width = width,
400 .height = height,
401 .layers = 1
402 }, &cmd_buffer->pool->alloc, &fb_h);
403
404 radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
405 &(VkRenderPassBeginInfo) {
406 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
407 .renderPass = state->depth_decomp[samples_log2].pass,
408 .framebuffer = fb_h,
409 .renderArea = {
410 .offset = {
411 0,
412 0,
413 },
414 .extent = {
415 width,
416 height,
417 }
418 },
419 .clearValueCount = 0,
420 .pClearValues = NULL,
421 },
422 VK_SUBPASS_CONTENTS_INLINE);
423
424 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
425 radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
426
427 radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
428 &cmd_buffer->pool->alloc);
429 }
430
431 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
432 struct radv_image *image,
433 VkImageSubresourceRange *subresourceRange,
434 struct radv_sample_locations_state *sample_locs,
435 enum radv_depth_op op)
436 {
437 struct radv_meta_saved_state saved_state;
438 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
439 VkPipeline *pipeline;
440
441 if (!radv_image_has_htile(image))
442 return;
443
444 radv_meta_save(&saved_state, cmd_buffer,
445 RADV_META_SAVE_GRAPHICS_PIPELINE |
446 RADV_META_SAVE_SAMPLE_LOCATIONS |
447 RADV_META_SAVE_PASS);
448
449 pipeline = radv_get_depth_pipeline(cmd_buffer, image, op);
450
451 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
452 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
453
454 if (sample_locs) {
455 assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
456
457 /* Set the sample locations specified during explicit or
458 * automatic layout transitions, otherwise the depth decompress
459 * pass uses the default HW locations.
460 */
461 radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
462 .sampleLocationsPerPixel = sample_locs->per_pixel,
463 .sampleLocationGridSize = sample_locs->grid_size,
464 .sampleLocationsCount = sample_locs->count,
465 .pSampleLocations = sample_locs->locations,
466 });
467 }
468
469 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
470 uint32_t width =
471 radv_minify(image->info.width,
472 subresourceRange->baseMipLevel + l);
473 uint32_t height =
474 radv_minify(image->info.height,
475 subresourceRange->baseMipLevel + l);
476
477 radv_CmdSetViewport(cmd_buffer_h, 0, 1,
478 &(VkViewport) {
479 .x = 0,
480 .y = 0,
481 .width = width,
482 .height = height,
483 .minDepth = 0.0f,
484 .maxDepth = 1.0f
485 });
486
487 radv_CmdSetScissor(cmd_buffer_h, 0, 1,
488 &(VkRect2D) {
489 .offset = { 0, 0 },
490 .extent = { width, height },
491 });
492
493 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
494 radv_process_depth_image_layer(cmd_buffer, image,
495 subresourceRange, l, s);
496 }
497 }
498
499 radv_meta_restore(&saved_state, cmd_buffer);
500 }
501
502 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
503 struct radv_image *image,
504 VkImageSubresourceRange *subresourceRange,
505 struct radv_sample_locations_state *sample_locs)
506 {
507 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
508 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
509 sample_locs, DEPTH_DECOMPRESS);
510 }
511
512 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
513 struct radv_image *image,
514 VkImageSubresourceRange *subresourceRange,
515 struct radv_sample_locations_state *sample_locs)
516 {
517 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
518 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
519 sample_locs, DEPTH_RESUMMARIZE);
520 }