radv/gfx9: allocate events from uncached VA space
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 static VkResult
32 create_pass(struct radv_device *device,
33 uint32_t samples,
34 VkRenderPass *pass)
35 {
36 VkResult result;
37 VkDevice device_h = radv_device_to_handle(device);
38 const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
39 VkAttachmentDescription attachment;
40
41 attachment.flags = 0;
42 attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
43 attachment.samples = samples;
44 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
45 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
46 attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
47 attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
48 attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
49 attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
50
51 result = radv_CreateRenderPass(device_h,
52 &(VkRenderPassCreateInfo) {
53 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
54 .attachmentCount = 1,
55 .pAttachments = &attachment,
56 .subpassCount = 1,
57 .pSubpasses = &(VkSubpassDescription) {
58 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
59 .inputAttachmentCount = 0,
60 .colorAttachmentCount = 0,
61 .pColorAttachments = NULL,
62 .pResolveAttachments = NULL,
63 .pDepthStencilAttachment = &(VkAttachmentReference) {
64 .attachment = 0,
65 .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
66 },
67 .preserveAttachmentCount = 0,
68 .pPreserveAttachments = NULL,
69 },
70 .dependencyCount = 0,
71 },
72 alloc,
73 pass);
74
75 return result;
76 }
77
78 static VkResult
79 create_pipeline(struct radv_device *device,
80 VkShaderModule vs_module_h,
81 uint32_t samples,
82 VkRenderPass pass,
83 VkPipeline *decompress_pipeline,
84 VkPipeline *resummarize_pipeline)
85 {
86 VkResult result;
87 VkDevice device_h = radv_device_to_handle(device);
88
89 struct radv_shader_module fs_module = {
90 .nir = radv_meta_build_nir_fs_noop(),
91 };
92
93 if (!fs_module.nir) {
94 /* XXX: Need more accurate error */
95 result = VK_ERROR_OUT_OF_HOST_MEMORY;
96 goto cleanup;
97 }
98
99 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
100 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
101 .stageCount = 2,
102 .pStages = (VkPipelineShaderStageCreateInfo[]) {
103 {
104 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
105 .stage = VK_SHADER_STAGE_VERTEX_BIT,
106 .module = vs_module_h,
107 .pName = "main",
108 },
109 {
110 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
111 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
112 .module = radv_shader_module_to_handle(&fs_module),
113 .pName = "main",
114 },
115 },
116 .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
117 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
118 .vertexBindingDescriptionCount = 0,
119 .vertexAttributeDescriptionCount = 0,
120 },
121 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
122 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
123 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
124 .primitiveRestartEnable = false,
125 },
126 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
128 .viewportCount = 1,
129 .scissorCount = 1,
130 },
131 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
132 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
133 .depthClampEnable = false,
134 .rasterizerDiscardEnable = false,
135 .polygonMode = VK_POLYGON_MODE_FILL,
136 .cullMode = VK_CULL_MODE_NONE,
137 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
138 },
139 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
140 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
141 .rasterizationSamples = samples,
142 .sampleShadingEnable = false,
143 .pSampleMask = NULL,
144 .alphaToCoverageEnable = false,
145 .alphaToOneEnable = false,
146 },
147 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
148 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
149 .logicOpEnable = false,
150 .attachmentCount = 0,
151 .pAttachments = NULL,
152 },
153 .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
154 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
155 .depthTestEnable = false,
156 .depthWriteEnable = false,
157 .depthBoundsTestEnable = false,
158 .stencilTestEnable = false,
159 },
160 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
161 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
162 .dynamicStateCount = 2,
163 .pDynamicStates = (VkDynamicState[]) {
164 VK_DYNAMIC_STATE_VIEWPORT,
165 VK_DYNAMIC_STATE_SCISSOR,
166 },
167 },
168 .renderPass = pass,
169 .subpass = 0,
170 };
171
172 result = radv_graphics_pipeline_create(device_h,
173 radv_pipeline_cache_to_handle(&device->meta_state.cache),
174 &pipeline_create_info,
175 &(struct radv_graphics_pipeline_create_info) {
176 .use_rectlist = true,
177 .db_flush_depth_inplace = true,
178 .db_flush_stencil_inplace = true,
179 },
180 &device->meta_state.alloc,
181 decompress_pipeline);
182 if (result != VK_SUCCESS)
183 goto cleanup;
184
185 result = radv_graphics_pipeline_create(device_h,
186 radv_pipeline_cache_to_handle(&device->meta_state.cache),
187 &pipeline_create_info,
188 &(struct radv_graphics_pipeline_create_info) {
189 .use_rectlist = true,
190 .db_flush_depth_inplace = true,
191 .db_flush_stencil_inplace = true,
192 .db_resummarize = true,
193 },
194 &device->meta_state.alloc,
195 resummarize_pipeline);
196 if (result != VK_SUCCESS)
197 goto cleanup;
198
199 goto cleanup;
200
201 cleanup:
202 ralloc_free(fs_module.nir);
203 return result;
204 }
205
206 void
207 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
208 {
209 struct radv_meta_state *state = &device->meta_state;
210 VkDevice device_h = radv_device_to_handle(device);
211 const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
212
213 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
214 VkRenderPass pass_h = state->depth_decomp[i].pass;
215 if (pass_h) {
216 radv_DestroyRenderPass(device_h, pass_h, alloc);
217 }
218 VkPipeline pipeline_h = state->depth_decomp[i].decompress_pipeline;
219 if (pipeline_h) {
220 radv_DestroyPipeline(device_h, pipeline_h, alloc);
221 }
222 pipeline_h = state->depth_decomp[i].resummarize_pipeline;
223 if (pipeline_h) {
224 radv_DestroyPipeline(device_h, pipeline_h, alloc);
225 }
226 }
227 }
228
229 VkResult
230 radv_device_init_meta_depth_decomp_state(struct radv_device *device)
231 {
232 struct radv_meta_state *state = &device->meta_state;
233 VkResult res = VK_SUCCESS;
234
235 zero(state->depth_decomp);
236
237 struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
238 if (!vs_module.nir) {
239 /* XXX: Need more accurate error */
240 res = VK_ERROR_OUT_OF_HOST_MEMORY;
241 goto fail;
242 }
243
244 VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
245
246 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
247 uint32_t samples = 1 << i;
248
249 res = create_pass(device, samples, &state->depth_decomp[i].pass);
250 if (res != VK_SUCCESS)
251 goto fail;
252
253 res = create_pipeline(device, vs_module_h, samples,
254 state->depth_decomp[i].pass,
255 &state->depth_decomp[i].decompress_pipeline,
256 &state->depth_decomp[i].resummarize_pipeline);
257 if (res != VK_SUCCESS)
258 goto fail;
259 }
260
261 goto cleanup;
262
263 fail:
264 radv_device_finish_meta_depth_decomp_state(device);
265
266 cleanup:
267 ralloc_free(vs_module.nir);
268
269 return res;
270 }
271
272 static void
273 emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
274 const VkOffset2D *dest_offset,
275 const VkExtent2D *depth_decomp_extent,
276 VkPipeline pipeline_h)
277 {
278 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
279
280 RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);
281
282 if (cmd_buffer->state.pipeline != pipeline) {
283 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
284 pipeline_h);
285 }
286
287 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
288 .x = dest_offset->x,
289 .y = dest_offset->y,
290 .width = depth_decomp_extent->width,
291 .height = depth_decomp_extent->height,
292 .minDepth = 0.0f,
293 .maxDepth = 1.0f
294 });
295
296 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
297 .offset = *dest_offset,
298 .extent = *depth_decomp_extent,
299 });
300
301 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
302 }
303
304
305 enum radv_depth_op {
306 DEPTH_DECOMPRESS,
307 DEPTH_RESUMMARIZE,
308 };
309
310 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
311 struct radv_image *image,
312 VkImageSubresourceRange *subresourceRange,
313 enum radv_depth_op op)
314 {
315 struct radv_meta_saved_state saved_state;
316 struct radv_meta_saved_pass_state saved_pass_state;
317 VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
318 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
319 uint32_t width = radv_minify(image->info.width,
320 subresourceRange->baseMipLevel);
321 uint32_t height = radv_minify(image->info.height,
322 subresourceRange->baseMipLevel);
323 uint32_t samples = image->info.samples;
324 uint32_t samples_log2 = ffs(samples) - 1;
325 struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
326
327 if (!image->surface.htile_size)
328 return;
329 radv_meta_save_pass(&saved_pass_state, cmd_buffer);
330
331 radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
332
333 for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
334 struct radv_image_view iview;
335
336 radv_image_view_init(&iview, cmd_buffer->device,
337 &(VkImageViewCreateInfo) {
338 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
339 .image = radv_image_to_handle(image),
340 .format = image->vk_format,
341 .subresourceRange = {
342 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
343 .baseMipLevel = subresourceRange->baseMipLevel,
344 .levelCount = 1,
345 .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
346 .layerCount = 1,
347 },
348 });
349
350
351 VkFramebuffer fb_h;
352 radv_CreateFramebuffer(device_h,
353 &(VkFramebufferCreateInfo) {
354 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
355 .attachmentCount = 1,
356 .pAttachments = (VkImageView[]) {
357 radv_image_view_to_handle(&iview)
358 },
359 .width = width,
360 .height = height,
361 .layers = 1
362 },
363 &cmd_buffer->pool->alloc,
364 &fb_h);
365
366 radv_CmdBeginRenderPass(cmd_buffer_h,
367 &(VkRenderPassBeginInfo) {
368 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
369 .renderPass = meta_state->depth_decomp[samples_log2].pass,
370 .framebuffer = fb_h,
371 .renderArea = {
372 .offset = {
373 0,
374 0,
375 },
376 .extent = {
377 width,
378 height,
379 }
380 },
381 .clearValueCount = 0,
382 .pClearValues = NULL,
383 },
384 VK_SUBPASS_CONTENTS_INLINE);
385
386 VkPipeline pipeline_h;
387 switch (op) {
388 case DEPTH_DECOMPRESS:
389 pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
390 break;
391 case DEPTH_RESUMMARIZE:
392 pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
393 break;
394 default:
395 unreachable("unknown operation");
396 }
397
398 emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h);
399 radv_CmdEndRenderPass(cmd_buffer_h);
400
401 radv_DestroyFramebuffer(device_h, fb_h,
402 &cmd_buffer->pool->alloc);
403 }
404 radv_meta_restore(&saved_state, cmd_buffer);
405 radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
406 }
407
408 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
409 struct radv_image *image,
410 VkImageSubresourceRange *subresourceRange)
411 {
412 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
413 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_DECOMPRESS);
414 }
415
416 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
417 struct radv_image *image,
418 VkImageSubresourceRange *subresourceRange)
419 {
420 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
421 radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_RESUMMARIZE);
422 }