glthread: don't prefix variable_data with const
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 enum radv_depth_op {
32 DEPTH_DECOMPRESS,
33 DEPTH_RESUMMARIZE,
34 };
35
36 enum radv_depth_decompress {
37 DECOMPRESS_DEPTH_STENCIL,
38 DECOMPRESS_DEPTH,
39 DECOMPRESS_STENCIL,
40 };
41
42 static VkResult
43 create_pass(struct radv_device *device,
44 uint32_t samples,
45 VkRenderPass *pass)
46 {
47 VkResult result;
48 VkDevice device_h = radv_device_to_handle(device);
49 const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
50 VkAttachmentDescription attachment;
51
52 attachment.flags = 0;
53 attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
54 attachment.samples = samples;
55 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
56 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
57 attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
58 attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
59 attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
60 attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
61
62 result = radv_CreateRenderPass(device_h,
63 &(VkRenderPassCreateInfo) {
64 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
65 .attachmentCount = 1,
66 .pAttachments = &attachment,
67 .subpassCount = 1,
68 .pSubpasses = &(VkSubpassDescription) {
69 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
70 .inputAttachmentCount = 0,
71 .colorAttachmentCount = 0,
72 .pColorAttachments = NULL,
73 .pResolveAttachments = NULL,
74 .pDepthStencilAttachment = &(VkAttachmentReference) {
75 .attachment = 0,
76 .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
77 },
78 .preserveAttachmentCount = 0,
79 .pPreserveAttachments = NULL,
80 },
81 .dependencyCount = 2,
82 .pDependencies = (VkSubpassDependency[]) {
83 {
84 .srcSubpass = VK_SUBPASS_EXTERNAL,
85 .dstSubpass = 0,
86 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
87 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
88 .srcAccessMask = 0,
89 .dstAccessMask = 0,
90 .dependencyFlags = 0
91 },
92 {
93 .srcSubpass = 0,
94 .dstSubpass = VK_SUBPASS_EXTERNAL,
95 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
96 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
97 .srcAccessMask = 0,
98 .dstAccessMask = 0,
99 .dependencyFlags = 0
100 }
101 },
102 },
103 alloc,
104 pass);
105
106 return result;
107 }
108
109 static VkResult
110 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
111 {
112 VkPipelineLayoutCreateInfo pl_create_info = {
113 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
114 .setLayoutCount = 0,
115 .pSetLayouts = NULL,
116 .pushConstantRangeCount = 0,
117 .pPushConstantRanges = NULL,
118 };
119
120 return radv_CreatePipelineLayout(radv_device_to_handle(device),
121 &pl_create_info,
122 &device->meta_state.alloc,
123 layout);
124 }
125
126 static VkResult
127 create_pipeline(struct radv_device *device,
128 VkShaderModule vs_module_h,
129 uint32_t samples,
130 VkRenderPass pass,
131 VkPipelineLayout layout,
132 enum radv_depth_op op,
133 enum radv_depth_decompress decompress,
134 VkPipeline *pipeline)
135 {
136 VkResult result;
137 VkDevice device_h = radv_device_to_handle(device);
138 struct radv_shader_module vs_module = {0};
139
140 mtx_lock(&device->meta_state.mtx);
141 if (*pipeline) {
142 mtx_unlock(&device->meta_state.mtx);
143 return VK_SUCCESS;
144 }
145
146 if (!vs_module_h) {
147 vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
148 vs_module_h = radv_shader_module_to_handle(&vs_module);
149 }
150
151 struct radv_shader_module fs_module = {
152 .nir = radv_meta_build_nir_fs_noop(),
153 };
154
155 if (!fs_module.nir) {
156 /* XXX: Need more accurate error */
157 result = VK_ERROR_OUT_OF_HOST_MEMORY;
158 goto cleanup;
159 }
160
161 const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
162 .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
163 .sampleLocationsEnable = false,
164 };
165
166 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
167 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
168 .stageCount = 2,
169 .pStages = (VkPipelineShaderStageCreateInfo[]) {
170 {
171 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
172 .stage = VK_SHADER_STAGE_VERTEX_BIT,
173 .module = vs_module_h,
174 .pName = "main",
175 },
176 {
177 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
178 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
179 .module = radv_shader_module_to_handle(&fs_module),
180 .pName = "main",
181 },
182 },
183 .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
184 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
185 .vertexBindingDescriptionCount = 0,
186 .vertexAttributeDescriptionCount = 0,
187 },
188 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
189 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
190 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
191 .primitiveRestartEnable = false,
192 },
193 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
194 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
195 .viewportCount = 1,
196 .scissorCount = 1,
197 },
198 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
199 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
200 .depthClampEnable = false,
201 .rasterizerDiscardEnable = false,
202 .polygonMode = VK_POLYGON_MODE_FILL,
203 .cullMode = VK_CULL_MODE_NONE,
204 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
205 },
206 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
207 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
208 .pNext = &sample_locs_create_info,
209 .rasterizationSamples = samples,
210 .sampleShadingEnable = false,
211 .pSampleMask = NULL,
212 .alphaToCoverageEnable = false,
213 .alphaToOneEnable = false,
214 },
215 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
216 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
217 .logicOpEnable = false,
218 .attachmentCount = 0,
219 .pAttachments = NULL,
220 },
221 .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
222 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
223 .depthTestEnable = false,
224 .depthWriteEnable = false,
225 .depthBoundsTestEnable = false,
226 .stencilTestEnable = false,
227 },
228 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
229 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
230 .dynamicStateCount = 3,
231 .pDynamicStates = (VkDynamicState[]) {
232 VK_DYNAMIC_STATE_VIEWPORT,
233 VK_DYNAMIC_STATE_SCISSOR,
234 VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
235 },
236 },
237 .layout = layout,
238 .renderPass = pass,
239 .subpass = 0,
240 };
241
242 struct radv_graphics_pipeline_create_info extra = {
243 .use_rectlist = true,
244 .db_flush_depth_inplace = decompress == DECOMPRESS_DEPTH_STENCIL ||
245 decompress == DECOMPRESS_DEPTH,
246 .db_flush_stencil_inplace = decompress == DECOMPRESS_DEPTH_STENCIL ||
247 decompress == DECOMPRESS_STENCIL,
248 .db_resummarize = op == DEPTH_RESUMMARIZE,
249 };
250
251 result = radv_graphics_pipeline_create(device_h,
252 radv_pipeline_cache_to_handle(&device->meta_state.cache),
253 &pipeline_create_info, &extra,
254 &device->meta_state.alloc,
255 pipeline);
256
257 cleanup:
258 ralloc_free(fs_module.nir);
259 if (vs_module.nir)
260 ralloc_free(vs_module.nir);
261 mtx_unlock(&device->meta_state.mtx);
262 return result;
263 }
264
265 void
266 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
267 {
268 struct radv_meta_state *state = &device->meta_state;
269
270 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
271 radv_DestroyRenderPass(radv_device_to_handle(device),
272 state->depth_decomp[i].pass,
273 &state->alloc);
274 radv_DestroyPipelineLayout(radv_device_to_handle(device),
275 state->depth_decomp[i].p_layout,
276 &state->alloc);
277
278 for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
279 radv_DestroyPipeline(radv_device_to_handle(device),
280 state->depth_decomp[i].decompress_pipeline[j],
281 &state->alloc);
282 }
283 radv_DestroyPipeline(radv_device_to_handle(device),
284 state->depth_decomp[i].resummarize_pipeline,
285 &state->alloc);
286 }
287 }
288
289 VkResult
290 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
291 {
292 struct radv_meta_state *state = &device->meta_state;
293 VkResult res = VK_SUCCESS;
294
295 struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
296 if (!vs_module.nir) {
297 /* XXX: Need more accurate error */
298 res = VK_ERROR_OUT_OF_HOST_MEMORY;
299 goto fail;
300 }
301
302 VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
303
304 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
305 uint32_t samples = 1 << i;
306
307 res = create_pass(device, samples, &state->depth_decomp[i].pass);
308 if (res != VK_SUCCESS)
309 goto fail;
310
311 res = create_pipeline_layout(device,
312 &state->depth_decomp[i].p_layout);
313 if (res != VK_SUCCESS)
314 goto fail;
315
316 if (on_demand)
317 continue;
318
319 for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
320 res = create_pipeline(device, vs_module_h, samples,
321 state->depth_decomp[i].pass,
322 state->depth_decomp[i].p_layout,
323 DEPTH_DECOMPRESS,
324 j,
325 &state->depth_decomp[i].decompress_pipeline[j]);
326 if (res != VK_SUCCESS)
327 goto fail;
328 }
329
330 res = create_pipeline(device, vs_module_h, samples,
331 state->depth_decomp[i].pass,
332 state->depth_decomp[i].p_layout,
333 DEPTH_RESUMMARIZE,
334 0, /* unused */
335 &state->depth_decomp[i].resummarize_pipeline);
336 if (res != VK_SUCCESS)
337 goto fail;
338 }
339
340 goto cleanup;
341
342 fail:
343 radv_device_finish_meta_depth_decomp_state(device);
344
345 cleanup:
346 ralloc_free(vs_module.nir);
347
348 return res;
349 }
350
351 static VkPipeline *
352 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
353 struct radv_image *image,
354 const VkImageSubresourceRange *subresourceRange,
355 enum radv_depth_op op)
356 {
357 struct radv_meta_state *state = &cmd_buffer->device->meta_state;
358 uint32_t samples = image->info.samples;
359 uint32_t samples_log2 = ffs(samples) - 1;
360 enum radv_depth_decompress decompress;
361 VkPipeline *pipeline;
362
363 if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
364 decompress = DECOMPRESS_DEPTH;
365 } else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
366 decompress = DECOMPRESS_STENCIL;
367 } else {
368 decompress = DECOMPRESS_DEPTH_STENCIL;
369 }
370
371 if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
372 VkResult ret;
373
374 for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
375 ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
376 state->depth_decomp[samples_log2].pass,
377 state->depth_decomp[samples_log2].p_layout,
378 DEPTH_DECOMPRESS,
379 i,
380 &state->depth_decomp[samples_log2].decompress_pipeline[i]);
381 if (ret != VK_SUCCESS) {
382 cmd_buffer->record_result = ret;
383 return NULL;
384 }
385 }
386
387 ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
388 state->depth_decomp[samples_log2].pass,
389 state->depth_decomp[samples_log2].p_layout,
390 DEPTH_RESUMMARIZE,
391 0, /* unused */
392 &state->depth_decomp[samples_log2].resummarize_pipeline);
393 if (ret != VK_SUCCESS) {
394 cmd_buffer->record_result = ret;
395 return NULL;
396 }
397 }
398
399 switch (op) {
400 case DEPTH_DECOMPRESS:
401 pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
402 break;
403 case DEPTH_RESUMMARIZE:
404 pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
405 break;
406 default:
407 unreachable("unknown operation");
408 }
409
410 return pipeline;
411 }
412
413 static void
414 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
415 struct radv_image *image,
416 const VkImageSubresourceRange *range,
417 int level, int layer)
418 {
419 struct radv_device *device = cmd_buffer->device;
420 struct radv_meta_state *state = &device->meta_state;
421 uint32_t samples_log2 = ffs(image->info.samples) - 1;
422 struct radv_image_view iview;
423 uint32_t width, height;
424
425 width = radv_minify(image->info.width, range->baseMipLevel + level);
426 height = radv_minify(image->info.height, range->baseMipLevel + level);
427
428 radv_image_view_init(&iview, device,
429 &(VkImageViewCreateInfo) {
430 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
431 .image = radv_image_to_handle(image),
432 .viewType = radv_meta_get_view_type(image),
433 .format = image->vk_format,
434 .subresourceRange = {
435 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
436 .baseMipLevel = range->baseMipLevel + level,
437 .levelCount = 1,
438 .baseArrayLayer = range->baseArrayLayer + layer,
439 .layerCount = 1,
440 },
441 }, NULL);
442
443
444 VkFramebuffer fb_h;
445 radv_CreateFramebuffer(radv_device_to_handle(device),
446 &(VkFramebufferCreateInfo) {
447 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
448 .attachmentCount = 1,
449 .pAttachments = (VkImageView[]) {
450 radv_image_view_to_handle(&iview)
451 },
452 .width = width,
453 .height = height,
454 .layers = 1
455 }, &cmd_buffer->pool->alloc, &fb_h);
456
457 radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
458 &(VkRenderPassBeginInfo) {
459 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
460 .renderPass = state->depth_decomp[samples_log2].pass,
461 .framebuffer = fb_h,
462 .renderArea = {
463 .offset = {
464 0,
465 0,
466 },
467 .extent = {
468 width,
469 height,
470 }
471 },
472 .clearValueCount = 0,
473 .pClearValues = NULL,
474 },
475 VK_SUBPASS_CONTENTS_INLINE);
476
477 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
478 radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
479
480 radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
481 &cmd_buffer->pool->alloc);
482 }
483
484 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
485 struct radv_image *image,
486 const VkImageSubresourceRange *subresourceRange,
487 struct radv_sample_locations_state *sample_locs,
488 enum radv_depth_op op)
489 {
490 struct radv_meta_saved_state saved_state;
491 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
492 VkPipeline *pipeline;
493
494 if (!radv_image_has_htile(image))
495 return;
496
497 radv_meta_save(&saved_state, cmd_buffer,
498 RADV_META_SAVE_GRAPHICS_PIPELINE |
499 RADV_META_SAVE_SAMPLE_LOCATIONS |
500 RADV_META_SAVE_PASS);
501
502 pipeline = radv_get_depth_pipeline(cmd_buffer, image,
503 subresourceRange, op);
504
505 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
506 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
507
508 if (sample_locs) {
509 assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
510
511 /* Set the sample locations specified during explicit or
512 * automatic layout transitions, otherwise the depth decompress
513 * pass uses the default HW locations.
514 */
515 radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
516 .sampleLocationsPerPixel = sample_locs->per_pixel,
517 .sampleLocationGridSize = sample_locs->grid_size,
518 .sampleLocationsCount = sample_locs->count,
519 .pSampleLocations = sample_locs->locations,
520 });
521 }
522
523 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
524 uint32_t width =
525 radv_minify(image->info.width,
526 subresourceRange->baseMipLevel + l);
527 uint32_t height =
528 radv_minify(image->info.height,
529 subresourceRange->baseMipLevel + l);
530
531 radv_CmdSetViewport(cmd_buffer_h, 0, 1,
532 &(VkViewport) {
533 .x = 0,
534 .y = 0,
535 .width = width,
536 .height = height,
537 .minDepth = 0.0f,
538 .maxDepth = 1.0f
539 });
540
541 radv_CmdSetScissor(cmd_buffer_h, 0, 1,
542 &(VkRect2D) {
543 .offset = { 0, 0 },
544 .extent = { width, height },
545 });
546
547 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
548 radv_process_depth_image_layer(cmd_buffer, image,
549 subresourceRange, l, s);
550 }
551 }
552
553 radv_meta_restore(&saved_state, cmd_buffer);
554 }
555
556 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
557 struct radv_image *image,
558 const VkImageSubresourceRange *subresourceRange,
559 struct radv_sample_locations_state *sample_locs)
560 {
561 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
562 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
563 sample_locs, DEPTH_DECOMPRESS);
564 }
565
566 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
567 struct radv_image *image,
568 const VkImageSubresourceRange *subresourceRange,
569 struct radv_sample_locations_state *sample_locs)
570 {
571 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
572 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
573 sample_locs, DEPTH_RESUMMARIZE);
574 }