radv: Add extra struct to image view creation.
[mesa.git] / src / amd / vulkan / radv_meta_clear.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "radv_debug.h"
25 #include "radv_meta.h"
26 #include "radv_private.h"
27 #include "nir/nir_builder.h"
28
29 #include "util/format_rgb9e5.h"
30 #include "vk_format.h"
31
32 enum {
33 DEPTH_CLEAR_SLOW,
34 DEPTH_CLEAR_FAST_EXPCLEAR,
35 DEPTH_CLEAR_FAST_NO_EXPCLEAR
36 };
37
38 static void
39 build_color_shaders(struct nir_shader **out_vs,
40 struct nir_shader **out_fs,
41 uint32_t frag_output)
42 {
43 nir_builder vs_b;
44 nir_builder fs_b;
45
46 nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
47 nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
48
49 vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
50 fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
51
52 const struct glsl_type *position_type = glsl_vec4_type();
53 const struct glsl_type *color_type = glsl_vec4_type();
54
55 nir_variable *vs_out_pos =
56 nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
57 "gl_Position");
58 vs_out_pos->data.location = VARYING_SLOT_POS;
59
60 nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
61 nir_intrinsic_set_base(in_color_load, 0);
62 nir_intrinsic_set_range(in_color_load, 16);
63 in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
64 in_color_load->num_components = 4;
65 nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color");
66 nir_builder_instr_insert(&fs_b, &in_color_load->instr);
67
68 nir_variable *fs_out_color =
69 nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
70 "f_color");
71 fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
72
73 nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf);
74
75 nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
76 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
77
78 const struct glsl_type *layer_type = glsl_int_type();
79 nir_variable *vs_out_layer =
80 nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
81 "v_layer");
82 vs_out_layer->data.location = VARYING_SLOT_LAYER;
83 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
84 nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
85 nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
86
87 nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
88 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
89
90 *out_vs = vs_b.shader;
91 *out_fs = fs_b.shader;
92 }
93
94 static VkResult
95 create_pipeline(struct radv_device *device,
96 struct radv_render_pass *render_pass,
97 uint32_t samples,
98 struct nir_shader *vs_nir,
99 struct nir_shader *fs_nir,
100 const VkPipelineVertexInputStateCreateInfo *vi_state,
101 const VkPipelineDepthStencilStateCreateInfo *ds_state,
102 const VkPipelineColorBlendStateCreateInfo *cb_state,
103 const VkPipelineLayout layout,
104 const struct radv_graphics_pipeline_create_info *extra,
105 const VkAllocationCallbacks *alloc,
106 VkPipeline *pipeline)
107 {
108 VkDevice device_h = radv_device_to_handle(device);
109 VkResult result;
110
111 struct radv_shader_module vs_m = { .nir = vs_nir };
112 struct radv_shader_module fs_m = { .nir = fs_nir };
113
114 result = radv_graphics_pipeline_create(device_h,
115 radv_pipeline_cache_to_handle(&device->meta_state.cache),
116 &(VkGraphicsPipelineCreateInfo) {
117 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
118 .stageCount = fs_nir ? 2 : 1,
119 .pStages = (VkPipelineShaderStageCreateInfo[]) {
120 {
121 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
122 .stage = VK_SHADER_STAGE_VERTEX_BIT,
123 .module = radv_shader_module_to_handle(&vs_m),
124 .pName = "main",
125 },
126 {
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
129 .module = radv_shader_module_to_handle(&fs_m),
130 .pName = "main",
131 },
132 },
133 .pVertexInputState = vi_state,
134 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
135 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
136 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
137 .primitiveRestartEnable = false,
138 },
139 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
140 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
141 .viewportCount = 1,
142 .scissorCount = 1,
143 },
144 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
145 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
146 .rasterizerDiscardEnable = false,
147 .polygonMode = VK_POLYGON_MODE_FILL,
148 .cullMode = VK_CULL_MODE_NONE,
149 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
150 .depthBiasEnable = false,
151 },
152 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
153 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
154 .rasterizationSamples = samples,
155 .sampleShadingEnable = false,
156 .pSampleMask = NULL,
157 .alphaToCoverageEnable = false,
158 .alphaToOneEnable = false,
159 },
160 .pDepthStencilState = ds_state,
161 .pColorBlendState = cb_state,
162 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
163 /* The meta clear pipeline declares all state as dynamic.
164 * As a consequence, vkCmdBindPipeline writes no dynamic state
165 * to the cmd buffer. Therefore, at the end of the meta clear,
166 * we need only restore dynamic state was vkCmdSet.
167 */
168 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
169 .dynamicStateCount = 8,
170 .pDynamicStates = (VkDynamicState[]) {
171 /* Everything except stencil write mask */
172 VK_DYNAMIC_STATE_VIEWPORT,
173 VK_DYNAMIC_STATE_SCISSOR,
174 VK_DYNAMIC_STATE_LINE_WIDTH,
175 VK_DYNAMIC_STATE_DEPTH_BIAS,
176 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
177 VK_DYNAMIC_STATE_DEPTH_BOUNDS,
178 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
179 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
180 },
181 },
182 .layout = layout,
183 .flags = 0,
184 .renderPass = radv_render_pass_to_handle(render_pass),
185 .subpass = 0,
186 },
187 extra,
188 alloc,
189 pipeline);
190
191 ralloc_free(vs_nir);
192 ralloc_free(fs_nir);
193
194 return result;
195 }
196
197 static VkResult
198 create_color_renderpass(struct radv_device *device,
199 VkFormat vk_format,
200 uint32_t samples,
201 VkRenderPass *pass)
202 {
203 mtx_lock(&device->meta_state.mtx);
204 if (*pass) {
205 mtx_unlock (&device->meta_state.mtx);
206 return VK_SUCCESS;
207 }
208
209 VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
210 &(VkRenderPassCreateInfo) {
211 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
212 .attachmentCount = 1,
213 .pAttachments = &(VkAttachmentDescription) {
214 .format = vk_format,
215 .samples = samples,
216 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
217 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
218 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
219 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
220 },
221 .subpassCount = 1,
222 .pSubpasses = &(VkSubpassDescription) {
223 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
224 .inputAttachmentCount = 0,
225 .colorAttachmentCount = 1,
226 .pColorAttachments = &(VkAttachmentReference) {
227 .attachment = 0,
228 .layout = VK_IMAGE_LAYOUT_GENERAL,
229 },
230 .pResolveAttachments = NULL,
231 .pDepthStencilAttachment = &(VkAttachmentReference) {
232 .attachment = VK_ATTACHMENT_UNUSED,
233 .layout = VK_IMAGE_LAYOUT_GENERAL,
234 },
235 .preserveAttachmentCount = 0,
236 .pPreserveAttachments = NULL,
237 },
238 .dependencyCount = 0,
239 }, &device->meta_state.alloc, pass);
240 mtx_unlock(&device->meta_state.mtx);
241 return result;
242 }
243
244 static VkResult
245 create_color_pipeline(struct radv_device *device,
246 uint32_t samples,
247 uint32_t frag_output,
248 VkPipeline *pipeline,
249 VkRenderPass pass)
250 {
251 struct nir_shader *vs_nir;
252 struct nir_shader *fs_nir;
253 VkResult result;
254
255 mtx_lock(&device->meta_state.mtx);
256 if (*pipeline) {
257 mtx_unlock(&device->meta_state.mtx);
258 return VK_SUCCESS;
259 }
260
261 build_color_shaders(&vs_nir, &fs_nir, frag_output);
262
263 const VkPipelineVertexInputStateCreateInfo vi_state = {
264 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
265 .vertexBindingDescriptionCount = 0,
266 .vertexAttributeDescriptionCount = 0,
267 };
268
269 const VkPipelineDepthStencilStateCreateInfo ds_state = {
270 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
271 .depthTestEnable = false,
272 .depthWriteEnable = false,
273 .depthBoundsTestEnable = false,
274 .stencilTestEnable = false,
275 };
276
277 VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 };
278 blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) {
279 .blendEnable = false,
280 .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
281 VK_COLOR_COMPONENT_R_BIT |
282 VK_COLOR_COMPONENT_G_BIT |
283 VK_COLOR_COMPONENT_B_BIT,
284 };
285
286 const VkPipelineColorBlendStateCreateInfo cb_state = {
287 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
288 .logicOpEnable = false,
289 .attachmentCount = MAX_RTS,
290 .pAttachments = blend_attachment_state
291 };
292
293
294 struct radv_graphics_pipeline_create_info extra = {
295 .use_rectlist = true,
296 };
297 result = create_pipeline(device, radv_render_pass_from_handle(pass),
298 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
299 device->meta_state.clear_color_p_layout,
300 &extra, &device->meta_state.alloc, pipeline);
301
302 mtx_unlock(&device->meta_state.mtx);
303 return result;
304 }
305
306 static void
307 finish_meta_clear_htile_mask_state(struct radv_device *device)
308 {
309 struct radv_meta_state *state = &device->meta_state;
310
311 radv_DestroyPipeline(radv_device_to_handle(device),
312 state->clear_htile_mask_pipeline,
313 &state->alloc);
314 radv_DestroyPipelineLayout(radv_device_to_handle(device),
315 state->clear_htile_mask_p_layout,
316 &state->alloc);
317 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
318 state->clear_htile_mask_ds_layout,
319 &state->alloc);
320 }
321
322 void
323 radv_device_finish_meta_clear_state(struct radv_device *device)
324 {
325 struct radv_meta_state *state = &device->meta_state;
326
327 for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
328 for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
329 radv_DestroyPipeline(radv_device_to_handle(device),
330 state->clear[i].color_pipelines[j],
331 &state->alloc);
332 radv_DestroyRenderPass(radv_device_to_handle(device),
333 state->clear[i].render_pass[j],
334 &state->alloc);
335 }
336
337 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
338 radv_DestroyPipeline(radv_device_to_handle(device),
339 state->clear[i].depth_only_pipeline[j],
340 &state->alloc);
341 radv_DestroyPipeline(radv_device_to_handle(device),
342 state->clear[i].stencil_only_pipeline[j],
343 &state->alloc);
344 radv_DestroyPipeline(radv_device_to_handle(device),
345 state->clear[i].depthstencil_pipeline[j],
346 &state->alloc);
347 }
348 radv_DestroyRenderPass(radv_device_to_handle(device),
349 state->clear[i].depthstencil_rp,
350 &state->alloc);
351 }
352 radv_DestroyPipelineLayout(radv_device_to_handle(device),
353 state->clear_color_p_layout,
354 &state->alloc);
355 radv_DestroyPipelineLayout(radv_device_to_handle(device),
356 state->clear_depth_p_layout,
357 &state->alloc);
358
359 finish_meta_clear_htile_mask_state(device);
360 }
361
362 static void
363 emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
364 const VkClearAttachment *clear_att,
365 const VkClearRect *clear_rect,
366 uint32_t view_mask)
367 {
368 struct radv_device *device = cmd_buffer->device;
369 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
370 const uint32_t subpass_att = clear_att->colorAttachment;
371 const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
372 const struct radv_image_view *iview = cmd_buffer->state.attachments ?
373 cmd_buffer->state.attachments[pass_att].iview : NULL;
374 uint32_t samples, samples_log2;
375 VkFormat format;
376 unsigned fs_key;
377 VkClearColorValue clear_value = clear_att->clearValue.color;
378 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
379 VkPipeline pipeline;
380
381 /* When a framebuffer is bound to the current command buffer, get the
382 * number of samples from it. Otherwise, get the number of samples from
383 * the render pass because it's likely a secondary command buffer.
384 */
385 if (iview) {
386 samples = iview->image->info.samples;
387 format = iview->vk_format;
388 } else {
389 samples = cmd_buffer->state.pass->attachments[pass_att].samples;
390 format = cmd_buffer->state.pass->attachments[pass_att].format;
391 }
392
393 samples_log2 = ffs(samples) - 1;
394 fs_key = radv_format_meta_fs_key(format);
395
396 if (fs_key == -1) {
397 radv_finishme("color clears incomplete");
398 return;
399 }
400
401 if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
402 VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
403 samples,
404 &device->meta_state.clear[samples_log2].render_pass[fs_key]);
405 if (ret != VK_SUCCESS) {
406 cmd_buffer->record_result = ret;
407 return;
408 }
409 }
410
411 if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
412 VkResult ret = create_color_pipeline(device, samples, 0,
413 &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
414 device->meta_state.clear[samples_log2].render_pass[fs_key]);
415 if (ret != VK_SUCCESS) {
416 cmd_buffer->record_result = ret;
417 return;
418 }
419 }
420
421 pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
422 if (!pipeline) {
423 radv_finishme("color clears incomplete");
424 return;
425 }
426 assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
427 assert(pipeline);
428 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
429 assert(clear_att->colorAttachment < subpass->color_count);
430
431 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
432 device->meta_state.clear_color_p_layout,
433 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
434 &clear_value);
435
436 struct radv_subpass clear_subpass = {
437 .color_count = 1,
438 .color_attachments = (struct radv_subpass_attachment[]) {
439 subpass->color_attachments[clear_att->colorAttachment]
440 },
441 .depth_stencil_attachment = NULL,
442 };
443
444 radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
445
446 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
447 pipeline);
448
449 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
450 .x = clear_rect->rect.offset.x,
451 .y = clear_rect->rect.offset.y,
452 .width = clear_rect->rect.extent.width,
453 .height = clear_rect->rect.extent.height,
454 .minDepth = 0.0f,
455 .maxDepth = 1.0f
456 });
457
458 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
459
460 if (view_mask) {
461 unsigned i;
462 for_each_bit(i, view_mask)
463 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
464 } else {
465 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
466 }
467
468 radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
469 }
470
471
472 static void
473 build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs)
474 {
475 nir_builder vs_b, fs_b;
476
477 nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
478 nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
479
480 vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
481 fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
482 const struct glsl_type *position_out_type = glsl_vec4_type();
483
484 nir_variable *vs_out_pos =
485 nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
486 "gl_Position");
487 vs_out_pos->data.location = VARYING_SLOT_POS;
488
489 nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
490 nir_intrinsic_set_base(in_color_load, 0);
491 nir_intrinsic_set_range(in_color_load, 4);
492 in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
493 in_color_load->num_components = 1;
494 nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
495 nir_builder_instr_insert(&vs_b, &in_color_load->instr);
496
497 nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, &in_color_load->dest.ssa);
498 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
499
500 const struct glsl_type *layer_type = glsl_int_type();
501 nir_variable *vs_out_layer =
502 nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
503 "v_layer");
504 vs_out_layer->data.location = VARYING_SLOT_LAYER;
505 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
506 nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
507 nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
508
509 nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
510 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
511
512 *out_vs = vs_b.shader;
513 *out_fs = fs_b.shader;
514 }
515
516 static VkResult
517 create_depthstencil_renderpass(struct radv_device *device,
518 uint32_t samples,
519 VkRenderPass *render_pass)
520 {
521 mtx_lock(&device->meta_state.mtx);
522 if (*render_pass) {
523 mtx_unlock(&device->meta_state.mtx);
524 return VK_SUCCESS;
525 }
526
527 VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
528 &(VkRenderPassCreateInfo) {
529 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
530 .attachmentCount = 1,
531 .pAttachments = &(VkAttachmentDescription) {
532 .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
533 .samples = samples,
534 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
535 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
536 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
537 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
538 },
539 .subpassCount = 1,
540 .pSubpasses = &(VkSubpassDescription) {
541 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
542 .inputAttachmentCount = 0,
543 .colorAttachmentCount = 0,
544 .pColorAttachments = NULL,
545 .pResolveAttachments = NULL,
546 .pDepthStencilAttachment = &(VkAttachmentReference) {
547 .attachment = 0,
548 .layout = VK_IMAGE_LAYOUT_GENERAL,
549 },
550 .preserveAttachmentCount = 0,
551 .pPreserveAttachments = NULL,
552 },
553 .dependencyCount = 0,
554 }, &device->meta_state.alloc, render_pass);
555 mtx_unlock(&device->meta_state.mtx);
556 return result;
557 }
558
559 static VkResult
560 create_depthstencil_pipeline(struct radv_device *device,
561 VkImageAspectFlags aspects,
562 uint32_t samples,
563 int index,
564 VkPipeline *pipeline,
565 VkRenderPass render_pass)
566 {
567 struct nir_shader *vs_nir, *fs_nir;
568 VkResult result;
569
570 mtx_lock(&device->meta_state.mtx);
571 if (*pipeline) {
572 mtx_unlock(&device->meta_state.mtx);
573 return VK_SUCCESS;
574 }
575
576 build_depthstencil_shader(&vs_nir, &fs_nir);
577
578 const VkPipelineVertexInputStateCreateInfo vi_state = {
579 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
580 .vertexBindingDescriptionCount = 0,
581 .vertexAttributeDescriptionCount = 0,
582 };
583
584 const VkPipelineDepthStencilStateCreateInfo ds_state = {
585 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
586 .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
587 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
588 .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
589 .depthBoundsTestEnable = false,
590 .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
591 .front = {
592 .passOp = VK_STENCIL_OP_REPLACE,
593 .compareOp = VK_COMPARE_OP_ALWAYS,
594 .writeMask = UINT32_MAX,
595 .reference = 0, /* dynamic */
596 },
597 .back = { 0 /* dont care */ },
598 };
599
600 const VkPipelineColorBlendStateCreateInfo cb_state = {
601 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
602 .logicOpEnable = false,
603 .attachmentCount = 0,
604 .pAttachments = NULL,
605 };
606
607 struct radv_graphics_pipeline_create_info extra = {
608 .use_rectlist = true,
609 };
610
611 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
612 extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
613 extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
614 }
615 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
616 extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
617 extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
618 }
619 result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
620 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
621 device->meta_state.clear_depth_p_layout,
622 &extra, &device->meta_state.alloc, pipeline);
623
624 mtx_unlock(&device->meta_state.mtx);
625 return result;
626 }
627
628 static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
629 const struct radv_image_view *iview,
630 VkImageAspectFlags aspects,
631 VkImageLayout layout,
632 bool in_render_loop,
633 const VkClearRect *clear_rect,
634 VkClearDepthStencilValue clear_value)
635 {
636 if (!iview)
637 return false;
638
639 uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
640 cmd_buffer->queue_family_index,
641 cmd_buffer->queue_family_index);
642 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
643 clear_rect->rect.extent.width != iview->extent.width ||
644 clear_rect->rect.extent.height != iview->extent.height)
645 return false;
646 if (radv_image_is_tc_compat_htile(iview->image) &&
647 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
648 clear_value.depth != 1.0) ||
649 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
650 return false;
651 if (radv_image_has_htile(iview->image) &&
652 iview->base_mip == 0 &&
653 iview->base_layer == 0 &&
654 iview->layer_count == iview->image->info.array_size &&
655 radv_layout_is_htile_compressed(iview->image, layout, in_render_loop, queue_mask) &&
656 radv_image_extent_compare(iview->image, &iview->extent))
657 return true;
658 return false;
659 }
660
661 static VkPipeline
662 pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
663 struct radv_meta_state *meta_state,
664 const struct radv_image_view *iview,
665 int samples_log2,
666 VkImageAspectFlags aspects,
667 VkImageLayout layout,
668 bool in_render_loop,
669 const VkClearRect *clear_rect,
670 VkClearDepthStencilValue clear_value)
671 {
672 bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout,
673 in_render_loop, clear_rect, clear_value);
674 int index = DEPTH_CLEAR_SLOW;
675 VkPipeline *pipeline;
676
677 if (fast) {
678 /* we don't know the previous clear values, so we always have
679 * the NO_EXPCLEAR path */
680 index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
681 }
682
683 switch (aspects) {
684 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
685 pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index];
686 break;
687 case VK_IMAGE_ASPECT_DEPTH_BIT:
688 pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index];
689 break;
690 case VK_IMAGE_ASPECT_STENCIL_BIT:
691 pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index];
692 break;
693 default:
694 unreachable("expected depth or stencil aspect");
695 }
696
697 if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
698 VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
699 &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
700 if (ret != VK_SUCCESS) {
701 cmd_buffer->record_result = ret;
702 return VK_NULL_HANDLE;
703 }
704 }
705
706 if (*pipeline == VK_NULL_HANDLE) {
707 VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index,
708 pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
709 if (ret != VK_SUCCESS) {
710 cmd_buffer->record_result = ret;
711 return VK_NULL_HANDLE;
712 }
713 }
714 return *pipeline;
715 }
716
717 static void
718 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
719 const VkClearAttachment *clear_att,
720 const VkClearRect *clear_rect,
721 struct radv_subpass_attachment *ds_att,
722 uint32_t view_mask)
723 {
724 struct radv_device *device = cmd_buffer->device;
725 struct radv_meta_state *meta_state = &device->meta_state;
726 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
727 const uint32_t pass_att = ds_att->attachment;
728 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
729 VkImageAspectFlags aspects = clear_att->aspectMask;
730 const struct radv_image_view *iview = cmd_buffer->state.attachments ?
731 cmd_buffer->state.attachments[pass_att].iview : NULL;
732 uint32_t samples, samples_log2;
733 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
734
735 /* When a framebuffer is bound to the current command buffer, get the
736 * number of samples from it. Otherwise, get the number of samples from
737 * the render pass because it's likely a secondary command buffer.
738 */
739 if (iview) {
740 samples = iview->image->info.samples;
741 } else {
742 samples = cmd_buffer->state.pass->attachments[pass_att].samples;
743 }
744
745 samples_log2 = ffs(samples) - 1;
746
747 assert(pass_att != VK_ATTACHMENT_UNUSED);
748
749 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
750 clear_value.depth = 1.0f;
751
752 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
753 device->meta_state.clear_depth_p_layout,
754 VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
755 &clear_value.depth);
756
757 uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
758 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
759 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
760 clear_value.stencil);
761 }
762
763 VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer,
764 meta_state,
765 iview,
766 samples_log2,
767 aspects,
768 ds_att->layout,
769 ds_att->in_render_loop,
770 clear_rect,
771 clear_value);
772 if (!pipeline)
773 return;
774
775 struct radv_subpass clear_subpass = {
776 .color_count = 0,
777 .color_attachments = NULL,
778 .depth_stencil_attachment = ds_att,
779 };
780
781 radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
782
783 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
784 pipeline);
785
786 if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
787 ds_att->layout, ds_att->in_render_loop,
788 clear_rect, clear_value))
789 radv_update_ds_clear_metadata(cmd_buffer, iview->image,
790 clear_value, aspects);
791
792 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
793 .x = clear_rect->rect.offset.x,
794 .y = clear_rect->rect.offset.y,
795 .width = clear_rect->rect.extent.width,
796 .height = clear_rect->rect.extent.height,
797 .minDepth = 0.0f,
798 .maxDepth = 1.0f
799 });
800
801 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
802
803 if (view_mask) {
804 unsigned i;
805 for_each_bit(i, view_mask)
806 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
807 } else {
808 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
809 }
810
811 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
812 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
813 prev_reference);
814 }
815
816 radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
817 }
818
819 static uint32_t
820 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
821 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
822 uint32_t htile_value, uint32_t htile_mask)
823 {
824 struct radv_device *device = cmd_buffer->device;
825 struct radv_meta_state *state = &device->meta_state;
826 uint64_t block_count = round_up_u64(size, 1024);
827 struct radv_meta_saved_state saved_state;
828
829 radv_meta_save(&saved_state, cmd_buffer,
830 RADV_META_SAVE_COMPUTE_PIPELINE |
831 RADV_META_SAVE_CONSTANTS |
832 RADV_META_SAVE_DESCRIPTORS);
833
834 struct radv_buffer dst_buffer = {
835 .bo = bo,
836 .offset = offset,
837 .size = size
838 };
839
840 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
841 VK_PIPELINE_BIND_POINT_COMPUTE,
842 state->clear_htile_mask_pipeline);
843
844 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
845 state->clear_htile_mask_p_layout,
846 0, /* set */
847 1, /* descriptorWriteCount */
848 (VkWriteDescriptorSet[]) {
849 {
850 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
851 .dstBinding = 0,
852 .dstArrayElement = 0,
853 .descriptorCount = 1,
854 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
855 .pBufferInfo = &(VkDescriptorBufferInfo) {
856 .buffer = radv_buffer_to_handle(&dst_buffer),
857 .offset = 0,
858 .range = size
859 }
860 }
861 });
862
863 const unsigned constants[2] = {
864 htile_value & htile_mask,
865 ~htile_mask,
866 };
867
868 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
869 state->clear_htile_mask_p_layout,
870 VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
871 constants);
872
873 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
874
875 radv_meta_restore(&saved_state, cmd_buffer);
876
877 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
878 RADV_CMD_FLAG_INV_VCACHE |
879 RADV_CMD_FLAG_WB_L2;
880 }
881
882 static uint32_t
883 radv_get_htile_fast_clear_value(const struct radv_image *image,
884 VkClearDepthStencilValue value)
885 {
886 uint32_t clear_value;
887
888 if (!image->planes[0].surface.has_stencil) {
889 clear_value = value.depth ? 0xfffffff0 : 0;
890 } else {
891 clear_value = value.depth ? 0xfffc0000 : 0;
892 }
893
894 return clear_value;
895 }
896
897 static uint32_t
898 radv_get_htile_mask(const struct radv_image *image, VkImageAspectFlags aspects)
899 {
900 uint32_t mask = 0;
901
902 if (!image->planes[0].surface.has_stencil) {
903 /* All the HTILE buffer is used when there is no stencil. */
904 mask = UINT32_MAX;
905 } else {
906 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
907 mask |= 0xfffffc0f;
908 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
909 mask |= 0x000003f0;
910 }
911
912 return mask;
913 }
914
915 static bool
916 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
917 {
918 return value.depth == 1.0f || value.depth == 0.0f;
919 }
920
921 static bool
922 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
923 {
924 return value.stencil == 0;
925 }
926
927 /**
928 * Determine if the given image can be fast cleared.
929 */
930 static bool
931 radv_image_can_fast_clear(struct radv_device *device, struct radv_image *image)
932 {
933 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
934 return false;
935
936 if (vk_format_is_color(image->vk_format)) {
937 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
938 return false;
939
940 /* RB+ doesn't work with CMASK fast clear on Stoney. */
941 if (!radv_image_has_dcc(image) &&
942 device->physical_device->rad_info.family == CHIP_STONEY)
943 return false;
944 } else {
945 if (!radv_image_has_htile(image))
946 return false;
947 }
948
949 /* Do not fast clears 3D images. */
950 if (image->type == VK_IMAGE_TYPE_3D)
951 return false;
952
953 return true;
954 }
955
956 /**
957 * Determine if the given image view can be fast cleared.
958 */
959 static bool
960 radv_image_view_can_fast_clear(struct radv_device *device,
961 const struct radv_image_view *iview)
962 {
963 struct radv_image *image;
964
965 if (!iview)
966 return false;
967 image = iview->image;
968
969 /* Only fast clear if the image itself can be fast cleared. */
970 if (!radv_image_can_fast_clear(device, image))
971 return false;
972
973 /* Only fast clear if all layers are bound. */
974 if (iview->base_layer > 0 ||
975 iview->layer_count != image->info.array_size)
976 return false;
977
978 /* Only fast clear if the view covers the whole image. */
979 if (!radv_image_extent_compare(image, &iview->extent))
980 return false;
981
982 return true;
983 }
984
985 static bool
986 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
987 const struct radv_image_view *iview,
988 VkImageLayout image_layout,
989 bool in_render_loop,
990 VkImageAspectFlags aspects,
991 const VkClearRect *clear_rect,
992 const VkClearDepthStencilValue clear_value,
993 uint32_t view_mask)
994 {
995 if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
996 return false;
997
998 if (!radv_layout_is_htile_compressed(iview->image, image_layout, in_render_loop,
999 radv_image_queue_family_mask(iview->image,
1000 cmd_buffer->queue_family_index,
1001 cmd_buffer->queue_family_index)))
1002 return false;
1003
1004 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1005 clear_rect->rect.extent.width != iview->image->info.width ||
1006 clear_rect->rect.extent.height != iview->image->info.height)
1007 return false;
1008
1009 if (view_mask && (iview->image->info.array_size >= 32 ||
1010 (1u << iview->image->info.array_size) - 1u != view_mask))
1011 return false;
1012 if (!view_mask && clear_rect->baseArrayLayer != 0)
1013 return false;
1014 if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1015 return false;
1016
1017 if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX9 &&
1018 (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ||
1019 ((vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1020 !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))))
1021 return false;
1022
1023 if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1024 !radv_is_fast_clear_depth_allowed(clear_value)) ||
1025 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1026 !radv_is_fast_clear_stencil_allowed(clear_value)))
1027 return false;
1028
1029 return true;
1030 }
1031
1032 static void
1033 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
1034 const struct radv_image_view *iview,
1035 const VkClearAttachment *clear_att,
1036 enum radv_cmd_flush_bits *pre_flush,
1037 enum radv_cmd_flush_bits *post_flush)
1038 {
1039 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1040 VkImageAspectFlags aspects = clear_att->aspectMask;
1041 uint32_t clear_word, flush_bits;
1042 uint32_t htile_mask;
1043
1044 clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value);
1045 htile_mask = radv_get_htile_mask(iview->image, aspects);
1046
1047 if (pre_flush) {
1048 cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
1049 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush;
1050 *pre_flush |= cmd_buffer->state.flush_bits;
1051 }
1052
1053 if (htile_mask == UINT_MAX) {
1054 /* Clear the whole HTILE buffer. */
1055 flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo,
1056 iview->image->offset + iview->image->htile_offset,
1057 iview->image->planes[0].surface.htile_size, clear_word);
1058 } else {
1059 /* Only clear depth or stencil bytes in the HTILE buffer. */
1060 /* TODO: Implement that path for GFX10. */
1061 assert(cmd_buffer->device->physical_device->rad_info.chip_class == GFX9);
1062 flush_bits = clear_htile_mask(cmd_buffer, iview->image->bo,
1063 iview->image->offset + iview->image->htile_offset,
1064 iview->image->planes[0].surface.htile_size, clear_word,
1065 htile_mask);
1066 }
1067
1068 radv_update_ds_clear_metadata(cmd_buffer, iview->image, clear_value, aspects);
1069 if (post_flush) {
1070 *post_flush |= flush_bits;
1071 }
1072 }
1073
1074 static nir_shader *
1075 build_clear_htile_mask_shader()
1076 {
1077 nir_builder b;
1078
1079 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1080 b.shader->info.name = ralloc_strdup(b.shader, "meta_clear_htile_mask");
1081 b.shader->info.cs.local_size[0] = 64;
1082 b.shader->info.cs.local_size[1] = 1;
1083 b.shader->info.cs.local_size[2] = 1;
1084
1085 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1086 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1087 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1088 b.shader->info.cs.local_size[0],
1089 b.shader->info.cs.local_size[1],
1090 b.shader->info.cs.local_size[2], 0);
1091
1092 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1093
1094 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
1095 offset = nir_channel(&b, offset, 0);
1096
1097 nir_intrinsic_instr *buf =
1098 nir_intrinsic_instr_create(b.shader,
1099 nir_intrinsic_vulkan_resource_index);
1100
1101 buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1102 buf->num_components = 1;
1103 nir_intrinsic_set_desc_set(buf, 0);
1104 nir_intrinsic_set_binding(buf, 0);
1105 nir_ssa_dest_init(&buf->instr, &buf->dest, buf->num_components, 32, NULL);
1106 nir_builder_instr_insert(&b, &buf->instr);
1107
1108 nir_intrinsic_instr *constants =
1109 nir_intrinsic_instr_create(b.shader,
1110 nir_intrinsic_load_push_constant);
1111 nir_intrinsic_set_base(constants, 0);
1112 nir_intrinsic_set_range(constants, 8);
1113 constants->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1114 constants->num_components = 2;
1115 nir_ssa_dest_init(&constants->instr, &constants->dest, 2, 32, "constants");
1116 nir_builder_instr_insert(&b, &constants->instr);
1117
1118 nir_intrinsic_instr *load =
1119 nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
1120 load->src[0] = nir_src_for_ssa(&buf->dest.ssa);
1121 load->src[1] = nir_src_for_ssa(offset);
1122 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
1123 load->num_components = 4;
1124 nir_builder_instr_insert(&b, &load->instr);
1125
1126 /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
1127 nir_ssa_def *data =
1128 nir_iand(&b, &load->dest.ssa,
1129 nir_channel(&b, &constants->dest.ssa, 1));
1130 data = nir_ior(&b, data, nir_channel(&b, &constants->dest.ssa, 0));
1131
1132 nir_intrinsic_instr *store =
1133 nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
1134 store->src[0] = nir_src_for_ssa(data);
1135 store->src[1] = nir_src_for_ssa(&buf->dest.ssa);
1136 store->src[2] = nir_src_for_ssa(offset);
1137 nir_intrinsic_set_write_mask(store, 0xf);
1138 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
1139 store->num_components = 4;
1140 nir_builder_instr_insert(&b, &store->instr);
1141
1142 return b.shader;
1143 }
1144
1145 static VkResult
1146 init_meta_clear_htile_mask_state(struct radv_device *device)
1147 {
1148 struct radv_meta_state *state = &device->meta_state;
1149 struct radv_shader_module cs = { .nir = NULL };
1150 VkResult result;
1151
1152 cs.nir = build_clear_htile_mask_shader();
1153
1154 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
1155 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1156 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1157 .bindingCount = 1,
1158 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1159 {
1160 .binding = 0,
1161 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1162 .descriptorCount = 1,
1163 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1164 .pImmutableSamplers = NULL
1165 },
1166 }
1167 };
1168
1169 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1170 &ds_layout_info, &state->alloc,
1171 &state->clear_htile_mask_ds_layout);
1172 if (result != VK_SUCCESS)
1173 goto fail;
1174
1175 VkPipelineLayoutCreateInfo p_layout_info = {
1176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1177 .setLayoutCount = 1,
1178 .pSetLayouts = &state->clear_htile_mask_ds_layout,
1179 .pushConstantRangeCount = 1,
1180 .pPushConstantRanges = &(VkPushConstantRange){
1181 VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
1182 },
1183 };
1184
1185 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1186 &p_layout_info, &state->alloc,
1187 &state->clear_htile_mask_p_layout);
1188 if (result != VK_SUCCESS)
1189 goto fail;
1190
1191 VkPipelineShaderStageCreateInfo shader_stage = {
1192 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1193 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1194 .module = radv_shader_module_to_handle(&cs),
1195 .pName = "main",
1196 .pSpecializationInfo = NULL,
1197 };
1198
1199 VkComputePipelineCreateInfo pipeline_info = {
1200 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1201 .stage = shader_stage,
1202 .flags = 0,
1203 .layout = state->clear_htile_mask_p_layout,
1204 };
1205
1206 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1207 radv_pipeline_cache_to_handle(&state->cache),
1208 1, &pipeline_info, NULL,
1209 &state->clear_htile_mask_pipeline);
1210
1211 ralloc_free(cs.nir);
1212 return result;
1213 fail:
1214 ralloc_free(cs.nir);
1215 return result;
1216 }
1217
1218 VkResult
1219 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1220 {
1221 VkResult res;
1222 struct radv_meta_state *state = &device->meta_state;
1223
1224 VkPipelineLayoutCreateInfo pl_color_create_info = {
1225 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1226 .setLayoutCount = 0,
1227 .pushConstantRangeCount = 1,
1228 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
1229 };
1230
1231 res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1232 &pl_color_create_info,
1233 &device->meta_state.alloc,
1234 &device->meta_state.clear_color_p_layout);
1235 if (res != VK_SUCCESS)
1236 goto fail;
1237
1238 VkPipelineLayoutCreateInfo pl_depth_create_info = {
1239 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1240 .setLayoutCount = 0,
1241 .pushConstantRangeCount = 1,
1242 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
1243 };
1244
1245 res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1246 &pl_depth_create_info,
1247 &device->meta_state.alloc,
1248 &device->meta_state.clear_depth_p_layout);
1249 if (res != VK_SUCCESS)
1250 goto fail;
1251
1252 res = init_meta_clear_htile_mask_state(device);
1253 if (res != VK_SUCCESS)
1254 goto fail;
1255
1256 if (on_demand)
1257 return VK_SUCCESS;
1258
1259 for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
1260 uint32_t samples = 1 << i;
1261 for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1262 VkFormat format = radv_fs_key_format_exemplars[j];
1263 unsigned fs_key = radv_format_meta_fs_key(format);
1264 assert(!state->clear[i].color_pipelines[fs_key]);
1265
1266 res = create_color_renderpass(device, format, samples,
1267 &state->clear[i].render_pass[fs_key]);
1268 if (res != VK_SUCCESS)
1269 goto fail;
1270
1271 res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
1272 state->clear[i].render_pass[fs_key]);
1273 if (res != VK_SUCCESS)
1274 goto fail;
1275
1276 }
1277
1278 res = create_depthstencil_renderpass(device,
1279 samples,
1280 &state->clear[i].depthstencil_rp);
1281 if (res != VK_SUCCESS)
1282 goto fail;
1283
1284 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1285 res = create_depthstencil_pipeline(device,
1286 VK_IMAGE_ASPECT_DEPTH_BIT,
1287 samples,
1288 j,
1289 &state->clear[i].depth_only_pipeline[j],
1290 state->clear[i].depthstencil_rp);
1291 if (res != VK_SUCCESS)
1292 goto fail;
1293
1294 res = create_depthstencil_pipeline(device,
1295 VK_IMAGE_ASPECT_STENCIL_BIT,
1296 samples,
1297 j,
1298 &state->clear[i].stencil_only_pipeline[j],
1299 state->clear[i].depthstencil_rp);
1300 if (res != VK_SUCCESS)
1301 goto fail;
1302
1303 res = create_depthstencil_pipeline(device,
1304 VK_IMAGE_ASPECT_DEPTH_BIT |
1305 VK_IMAGE_ASPECT_STENCIL_BIT,
1306 samples,
1307 j,
1308 &state->clear[i].depthstencil_pipeline[j],
1309 state->clear[i].depthstencil_rp);
1310 if (res != VK_SUCCESS)
1311 goto fail;
1312 }
1313 }
1314 return VK_SUCCESS;
1315
1316 fail:
1317 radv_device_finish_meta_clear_state(device);
1318 return res;
1319 }
1320
1321 static uint32_t
1322 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1323 {
1324 uint32_t value = 0; /* Default value when no DCC. */
1325
1326 /* The fast-clear value is different for images that have both DCC and
1327 * CMASK metadata.
1328 */
1329 if (radv_image_has_dcc(image)) {
1330 /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1331 return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
1332 }
1333
1334 return value;
1335 }
1336
1337 uint32_t
1338 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
1339 struct radv_image *image,
1340 const VkImageSubresourceRange *range, uint32_t value)
1341 {
1342 uint64_t offset = image->offset + image->cmask_offset;
1343 uint64_t size;
1344
1345 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1346 /* TODO: clear layers. */
1347 size = image->planes[0].surface.cmask_size;
1348 } else {
1349 unsigned cmask_slice_size =
1350 image->planes[0].surface.cmask_slice_size;
1351
1352 offset += cmask_slice_size * range->baseArrayLayer;
1353 size = cmask_slice_size * radv_get_layerCount(image, range);
1354 }
1355
1356 return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
1357 }
1358
1359
1360 uint32_t
1361 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
1362 struct radv_image *image,
1363 const VkImageSubresourceRange *range, uint32_t value)
1364 {
1365 uint64_t offset = image->offset + image->fmask_offset;
1366 uint64_t size;
1367
1368 /* MSAA images do not support mipmap levels. */
1369 assert(range->baseMipLevel == 0 &&
1370 radv_get_levelCount(image, range) == 1);
1371
1372 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1373 /* TODO: clear layers. */
1374 size = image->planes[0].surface.fmask_size;
1375 } else {
1376 unsigned fmask_slice_size =
1377 image->planes[0].surface.u.legacy.fmask.slice_size;
1378
1379
1380 offset += fmask_slice_size * range->baseArrayLayer;
1381 size = fmask_slice_size * radv_get_layerCount(image, range);
1382 }
1383
1384 return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
1385 }
1386
1387 uint32_t
1388 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
1389 struct radv_image *image,
1390 const VkImageSubresourceRange *range, uint32_t value)
1391 {
1392 uint32_t level_count = radv_get_levelCount(image, range);
1393 uint32_t flush_bits = 0;
1394
1395 /* Mark the image as being compressed. */
1396 radv_update_dcc_metadata(cmd_buffer, image, range, true);
1397
1398 for (uint32_t l = 0; l < level_count; l++) {
1399 uint64_t offset = image->offset + image->dcc_offset;
1400 uint32_t level = range->baseMipLevel + l;
1401 uint64_t size;
1402
1403 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1404 /* Mipmap levels aren't implemented. */
1405 assert(level == 0);
1406 size = image->planes[0].surface.dcc_size;
1407 } else {
1408 const struct legacy_surf_level *surf_level =
1409 &image->planes[0].surface.u.legacy.level[level];
1410
1411 /* If dcc_fast_clear_size is 0 (which might happens for
1412 * mipmaps) the fill buffer operation below is a no-op.
1413 * This can only happen during initialization as the
1414 * fast clear path fallbacks to slow clears if one
1415 * level can't be fast cleared.
1416 */
1417 offset += surf_level->dcc_offset +
1418 surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1419 size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
1420 }
1421
1422 flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, offset,
1423 size, value);
1424 }
1425
1426 return flush_bits;
1427 }
1428
1429 uint32_t
1430 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1431 const VkImageSubresourceRange *range, uint32_t value)
1432 {
1433 unsigned layer_count = radv_get_layerCount(image, range);
1434 uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
1435 uint64_t offset = image->offset + image->htile_offset +
1436 image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
1437
1438 return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
1439 }
1440
1441 enum {
1442 RADV_DCC_CLEAR_REG = 0x20202020U,
1443 RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
1444 RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
1445 };
1446
1447 static void vi_get_fast_clear_parameters(VkFormat format,
1448 const VkClearColorValue *clear_value,
1449 uint32_t* reset_value,
1450 bool *can_avoid_fast_clear_elim)
1451 {
1452 bool values[4] = {};
1453 int extra_channel;
1454 bool main_value = false;
1455 bool extra_value = false;
1456 int i;
1457 *can_avoid_fast_clear_elim = false;
1458
1459 *reset_value = RADV_DCC_CLEAR_REG;
1460
1461 const struct vk_format_description *desc = vk_format_description(format);
1462 if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1463 format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1464 format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1465 extra_channel = -1;
1466 else if (desc->layout == VK_FORMAT_LAYOUT_PLAIN) {
1467 if (radv_translate_colorswap(format, false) <= 1)
1468 extra_channel = desc->nr_channels - 1;
1469 else
1470 extra_channel = 0;
1471 } else
1472 return;
1473
1474 for (i = 0; i < 4; i++) {
1475 int index = desc->swizzle[i] - VK_SWIZZLE_X;
1476 if (desc->swizzle[i] < VK_SWIZZLE_X ||
1477 desc->swizzle[i] > VK_SWIZZLE_W)
1478 continue;
1479
1480 if (desc->channel[i].pure_integer &&
1481 desc->channel[i].type == VK_FORMAT_TYPE_SIGNED) {
1482 /* Use the maximum value for clamping the clear color. */
1483 int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1484
1485 values[i] = clear_value->int32[i] != 0;
1486 if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1487 return;
1488 } else if (desc->channel[i].pure_integer &&
1489 desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED) {
1490 /* Use the maximum value for clamping the clear color. */
1491 unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1492
1493 values[i] = clear_value->uint32[i] != 0U;
1494 if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1495 return;
1496 } else {
1497 values[i] = clear_value->float32[i] != 0.0F;
1498 if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1499 return;
1500 }
1501
1502 if (index == extra_channel)
1503 extra_value = values[i];
1504 else
1505 main_value = values[i];
1506 }
1507
1508 for (int i = 0; i < 4; ++i)
1509 if (values[i] != main_value &&
1510 desc->swizzle[i] - VK_SWIZZLE_X != extra_channel &&
1511 desc->swizzle[i] >= VK_SWIZZLE_X &&
1512 desc->swizzle[i] <= VK_SWIZZLE_W)
1513 return;
1514
1515 *can_avoid_fast_clear_elim = true;
1516 *reset_value = 0;
1517 if (main_value)
1518 *reset_value |= RADV_DCC_CLEAR_MAIN_1;
1519
1520 if (extra_value)
1521 *reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
1522 return;
1523 }
1524
1525 static bool
1526 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
1527 const struct radv_image_view *iview,
1528 VkImageLayout image_layout,
1529 bool in_render_loop,
1530 const VkClearRect *clear_rect,
1531 VkClearColorValue clear_value,
1532 uint32_t view_mask)
1533 {
1534 uint32_t clear_color[2];
1535
1536 if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
1537 return false;
1538
1539 if (!radv_layout_can_fast_clear(iview->image, image_layout, in_render_loop,
1540 radv_image_queue_family_mask(iview->image,
1541 cmd_buffer->queue_family_index,
1542 cmd_buffer->queue_family_index)))
1543 return false;
1544
1545 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1546 clear_rect->rect.extent.width != iview->image->info.width ||
1547 clear_rect->rect.extent.height != iview->image->info.height)
1548 return false;
1549
1550 if (view_mask && (iview->image->info.array_size >= 32 ||
1551 (1u << iview->image->info.array_size) - 1u != view_mask))
1552 return false;
1553 if (!view_mask && clear_rect->baseArrayLayer != 0)
1554 return false;
1555 if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1556 return false;
1557
1558 /* DCC */
1559 if (!radv_format_pack_clear_color(iview->vk_format,
1560 clear_color, &clear_value))
1561 return false;
1562
1563 if (radv_dcc_enabled(iview->image, iview->base_mip)) {
1564 bool can_avoid_fast_clear_elim;
1565 uint32_t reset_value;
1566
1567 vi_get_fast_clear_parameters(iview->vk_format,
1568 &clear_value, &reset_value,
1569 &can_avoid_fast_clear_elim);
1570
1571 if (iview->image->info.samples > 1) {
1572 /* DCC fast clear with MSAA should clear CMASK. */
1573 /* FIXME: This doesn't work for now. There is a
1574 * hardware bug with fast clears and DCC for MSAA
1575 * textures. AMDVLK has a workaround but it doesn't
1576 * seem to work here. Note that we might emit useless
1577 * CB flushes but that shouldn't matter.
1578 */
1579 if (!can_avoid_fast_clear_elim)
1580 return false;
1581 }
1582
1583 if (iview->image->info.levels > 1 &&
1584 cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
1585 for (uint32_t l = 0; l < iview->level_count; l++) {
1586 uint32_t level = iview->base_mip + l;
1587 struct legacy_surf_level *surf_level =
1588 &iview->image->planes[0].surface.u.legacy.level[level];
1589
1590 /* Do not fast clears if one level can't be
1591 * fast cleared.
1592 */
1593 if (!surf_level->dcc_fast_clear_size)
1594 return false;
1595 }
1596 }
1597 }
1598
1599 return true;
1600 }
1601
1602
1603 static void
1604 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
1605 const struct radv_image_view *iview,
1606 const VkClearAttachment *clear_att,
1607 uint32_t subpass_att,
1608 enum radv_cmd_flush_bits *pre_flush,
1609 enum radv_cmd_flush_bits *post_flush)
1610 {
1611 VkClearColorValue clear_value = clear_att->clearValue.color;
1612 uint32_t clear_color[2], flush_bits = 0;
1613 uint32_t cmask_clear_value;
1614 VkImageSubresourceRange range = {
1615 .aspectMask = iview->aspect_mask,
1616 .baseMipLevel = iview->base_mip,
1617 .levelCount = iview->level_count,
1618 .baseArrayLayer = iview->base_layer,
1619 .layerCount = iview->layer_count,
1620 };
1621
1622 if (pre_flush) {
1623 cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
1624 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush;
1625 *pre_flush |= cmd_buffer->state.flush_bits;
1626 }
1627
1628 /* DCC */
1629 radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
1630
1631 cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1632
1633 /* clear cmask buffer */
1634 if (radv_dcc_enabled(iview->image, iview->base_mip)) {
1635 uint32_t reset_value;
1636 bool can_avoid_fast_clear_elim;
1637 bool need_decompress_pass = false;
1638
1639 vi_get_fast_clear_parameters(iview->vk_format,
1640 &clear_value, &reset_value,
1641 &can_avoid_fast_clear_elim);
1642
1643 if (radv_image_has_cmask(iview->image)) {
1644 flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
1645 &range, cmask_clear_value);
1646
1647 need_decompress_pass = true;
1648 }
1649
1650 if (!can_avoid_fast_clear_elim)
1651 need_decompress_pass = true;
1652
1653 flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range,
1654 reset_value);
1655
1656 radv_update_fce_metadata(cmd_buffer, iview->image, &range,
1657 need_decompress_pass);
1658 } else {
1659 flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
1660 &range, cmask_clear_value);
1661 }
1662
1663 if (post_flush) {
1664 *post_flush |= flush_bits;
1665 }
1666
1667 radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att,
1668 clear_color);
1669 }
1670
1671 /**
1672 * The parameters mean that same as those in vkCmdClearAttachments.
1673 */
1674 static void
1675 emit_clear(struct radv_cmd_buffer *cmd_buffer,
1676 const VkClearAttachment *clear_att,
1677 const VkClearRect *clear_rect,
1678 enum radv_cmd_flush_bits *pre_flush,
1679 enum radv_cmd_flush_bits *post_flush,
1680 uint32_t view_mask,
1681 bool ds_resolve_clear)
1682 {
1683 const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
1684 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1685 VkImageAspectFlags aspects = clear_att->aspectMask;
1686
1687 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1688 const uint32_t subpass_att = clear_att->colorAttachment;
1689 assert(subpass_att < subpass->color_count);
1690 const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
1691 if (pass_att == VK_ATTACHMENT_UNUSED)
1692 return;
1693
1694 VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
1695 bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
1696 const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
1697 VkClearColorValue clear_value = clear_att->clearValue.color;
1698
1699 if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop,
1700 clear_rect, clear_value, view_mask)) {
1701 radv_fast_clear_color(cmd_buffer, iview, clear_att,
1702 subpass_att, pre_flush,
1703 post_flush);
1704 } else {
1705 emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1706 }
1707 } else {
1708 struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
1709
1710 if (ds_resolve_clear)
1711 ds_att = subpass->ds_resolve_attachment;
1712
1713 if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
1714 return;
1715
1716 VkImageLayout image_layout = ds_att->layout;
1717 bool in_render_loop = ds_att->in_render_loop;
1718 const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
1719 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1720
1721 assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1722 VK_IMAGE_ASPECT_STENCIL_BIT));
1723
1724 if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
1725 in_render_loop, aspects, clear_rect,
1726 clear_value, view_mask)) {
1727 radv_fast_clear_depth(cmd_buffer, iview, clear_att,
1728 pre_flush, post_flush);
1729 } else {
1730 emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect,
1731 ds_att, view_mask);
1732 }
1733 }
1734 }
1735
1736 static inline bool
1737 radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
1738 {
1739 uint32_t view_mask = cmd_state->subpass->view_mask;
1740 return (a != VK_ATTACHMENT_UNUSED &&
1741 cmd_state->attachments[a].pending_clear_aspects &&
1742 (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
1743 }
1744
1745 static bool
1746 radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
1747 {
1748 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1749 uint32_t a;
1750
1751 if (!cmd_state->subpass)
1752 return false;
1753
1754 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1755 a = cmd_state->subpass->color_attachments[i].attachment;
1756 if (radv_attachment_needs_clear(cmd_state, a))
1757 return true;
1758 }
1759
1760 if (cmd_state->subpass->depth_stencil_attachment) {
1761 a = cmd_state->subpass->depth_stencil_attachment->attachment;
1762 if (radv_attachment_needs_clear(cmd_state, a))
1763 return true;
1764 }
1765
1766 if (!cmd_state->subpass->ds_resolve_attachment)
1767 return false;
1768
1769 a = cmd_state->subpass->ds_resolve_attachment->attachment;
1770 return radv_attachment_needs_clear(cmd_state, a);
1771 }
1772
1773 static void
1774 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
1775 struct radv_attachment_state *attachment,
1776 const VkClearAttachment *clear_att,
1777 enum radv_cmd_flush_bits *pre_flush,
1778 enum radv_cmd_flush_bits *post_flush,
1779 bool ds_resolve_clear)
1780 {
1781 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1782 uint32_t view_mask = cmd_state->subpass->view_mask;
1783
1784 VkClearRect clear_rect = {
1785 .rect = cmd_state->render_area,
1786 .baseArrayLayer = 0,
1787 .layerCount = cmd_state->framebuffer->layers,
1788 };
1789
1790 emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
1791 view_mask & ~attachment->cleared_views, ds_resolve_clear);
1792 if (view_mask)
1793 attachment->cleared_views |= view_mask;
1794 else
1795 attachment->pending_clear_aspects = 0;
1796 }
1797
1798 /**
1799 * Emit any pending attachment clears for the current subpass.
1800 *
1801 * @see radv_attachment_state::pending_clear_aspects
1802 */
1803 void
1804 radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
1805 {
1806 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1807 struct radv_meta_saved_state saved_state;
1808 enum radv_cmd_flush_bits pre_flush = 0;
1809 enum radv_cmd_flush_bits post_flush = 0;
1810
1811 if (!radv_subpass_needs_clear(cmd_buffer))
1812 return;
1813
1814 radv_meta_save(&saved_state, cmd_buffer,
1815 RADV_META_SAVE_GRAPHICS_PIPELINE |
1816 RADV_META_SAVE_CONSTANTS);
1817
1818 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1819 uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1820
1821 if (!radv_attachment_needs_clear(cmd_state, a))
1822 continue;
1823
1824 assert(cmd_state->attachments[a].pending_clear_aspects ==
1825 VK_IMAGE_ASPECT_COLOR_BIT);
1826
1827 VkClearAttachment clear_att = {
1828 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1829 .colorAttachment = i, /* Use attachment index relative to subpass */
1830 .clearValue = cmd_state->attachments[a].clear_value,
1831 };
1832
1833 radv_subpass_clear_attachment(cmd_buffer,
1834 &cmd_state->attachments[a],
1835 &clear_att, &pre_flush,
1836 &post_flush, false);
1837 }
1838
1839 if (cmd_state->subpass->depth_stencil_attachment) {
1840 uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
1841 if (radv_attachment_needs_clear(cmd_state, ds)) {
1842 VkClearAttachment clear_att = {
1843 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1844 .clearValue = cmd_state->attachments[ds].clear_value,
1845 };
1846
1847 radv_subpass_clear_attachment(cmd_buffer,
1848 &cmd_state->attachments[ds],
1849 &clear_att, &pre_flush,
1850 &post_flush, false);
1851 }
1852 }
1853
1854 if (cmd_state->subpass->ds_resolve_attachment) {
1855 uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
1856 if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
1857 VkClearAttachment clear_att = {
1858 .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
1859 .clearValue = cmd_state->attachments[ds_resolve].clear_value,
1860 };
1861
1862 radv_subpass_clear_attachment(cmd_buffer,
1863 &cmd_state->attachments[ds_resolve],
1864 &clear_att, &pre_flush,
1865 &post_flush, true);
1866 }
1867 }
1868
1869 radv_meta_restore(&saved_state, cmd_buffer);
1870 cmd_buffer->state.flush_bits |= post_flush;
1871 }
1872
1873 static void
1874 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
1875 struct radv_image *image,
1876 VkImageLayout image_layout,
1877 const VkImageSubresourceRange *range,
1878 VkFormat format, int level, int layer,
1879 const VkClearValue *clear_val)
1880 {
1881 VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
1882 struct radv_image_view iview;
1883 uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
1884 uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
1885
1886 radv_image_view_init(&iview, cmd_buffer->device,
1887 &(VkImageViewCreateInfo) {
1888 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1889 .image = radv_image_to_handle(image),
1890 .viewType = radv_meta_get_view_type(image),
1891 .format = format,
1892 .subresourceRange = {
1893 .aspectMask = range->aspectMask,
1894 .baseMipLevel = range->baseMipLevel + level,
1895 .levelCount = 1,
1896 .baseArrayLayer = range->baseArrayLayer + layer,
1897 .layerCount = 1
1898 },
1899 }, NULL);
1900
1901 VkFramebuffer fb;
1902 radv_CreateFramebuffer(device_h,
1903 &(VkFramebufferCreateInfo) {
1904 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1905 .attachmentCount = 1,
1906 .pAttachments = (VkImageView[]) {
1907 radv_image_view_to_handle(&iview),
1908 },
1909 .width = width,
1910 .height = height,
1911 .layers = 1
1912 },
1913 &cmd_buffer->pool->alloc,
1914 &fb);
1915
1916 VkAttachmentDescription att_desc = {
1917 .format = iview.vk_format,
1918 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1919 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1920 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1921 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
1922 .initialLayout = image_layout,
1923 .finalLayout = image_layout,
1924 };
1925
1926 VkSubpassDescription subpass_desc = {
1927 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1928 .inputAttachmentCount = 0,
1929 .colorAttachmentCount = 0,
1930 .pColorAttachments = NULL,
1931 .pResolveAttachments = NULL,
1932 .pDepthStencilAttachment = NULL,
1933 .preserveAttachmentCount = 0,
1934 .pPreserveAttachments = NULL,
1935 };
1936
1937 const VkAttachmentReference att_ref = {
1938 .attachment = 0,
1939 .layout = image_layout,
1940 };
1941
1942 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1943 subpass_desc.colorAttachmentCount = 1;
1944 subpass_desc.pColorAttachments = &att_ref;
1945 } else {
1946 subpass_desc.pDepthStencilAttachment = &att_ref;
1947 }
1948
1949 VkRenderPass pass;
1950 radv_CreateRenderPass(device_h,
1951 &(VkRenderPassCreateInfo) {
1952 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1953 .attachmentCount = 1,
1954 .pAttachments = &att_desc,
1955 .subpassCount = 1,
1956 .pSubpasses = &subpass_desc,
1957 },
1958 &cmd_buffer->pool->alloc,
1959 &pass);
1960
1961 radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
1962 &(VkRenderPassBeginInfo) {
1963 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
1964 .renderArea = {
1965 .offset = { 0, 0, },
1966 .extent = {
1967 .width = width,
1968 .height = height,
1969 },
1970 },
1971 .renderPass = pass,
1972 .framebuffer = fb,
1973 .clearValueCount = 0,
1974 .pClearValues = NULL,
1975 },
1976 VK_SUBPASS_CONTENTS_INLINE);
1977
1978 VkClearAttachment clear_att = {
1979 .aspectMask = range->aspectMask,
1980 .colorAttachment = 0,
1981 .clearValue = *clear_val,
1982 };
1983
1984 VkClearRect clear_rect = {
1985 .rect = {
1986 .offset = { 0, 0 },
1987 .extent = { width, height },
1988 },
1989 .baseArrayLayer = range->baseArrayLayer,
1990 .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
1991 };
1992
1993 emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
1994
1995 radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
1996 radv_DestroyRenderPass(device_h, pass,
1997 &cmd_buffer->pool->alloc);
1998 radv_DestroyFramebuffer(device_h, fb,
1999 &cmd_buffer->pool->alloc);
2000 }
2001
2002 /**
2003 * Return TRUE if a fast color or depth clear has been performed.
2004 */
2005 static bool
2006 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
2007 struct radv_image *image,
2008 VkFormat format,
2009 VkImageLayout image_layout,
2010 bool in_render_loop,
2011 const VkImageSubresourceRange *range,
2012 const VkClearValue *clear_val)
2013 {
2014 struct radv_image_view iview;
2015
2016 radv_image_view_init(&iview, cmd_buffer->device,
2017 &(VkImageViewCreateInfo) {
2018 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2019 .image = radv_image_to_handle(image),
2020 .viewType = radv_meta_get_view_type(image),
2021 .format = image->vk_format,
2022 .subresourceRange = {
2023 .aspectMask = range->aspectMask,
2024 .baseMipLevel = range->baseMipLevel,
2025 .levelCount = range->levelCount,
2026 .baseArrayLayer = range->baseArrayLayer,
2027 .layerCount = range->layerCount,
2028 },
2029 }, NULL);
2030
2031 VkClearRect clear_rect = {
2032 .rect = {
2033 .offset = { 0, 0 },
2034 .extent = {
2035 radv_minify(image->info.width, range->baseMipLevel),
2036 radv_minify(image->info.height, range->baseMipLevel),
2037 },
2038 },
2039 .baseArrayLayer = range->baseArrayLayer,
2040 .layerCount = range->layerCount,
2041 };
2042
2043 VkClearAttachment clear_att = {
2044 .aspectMask = range->aspectMask,
2045 .colorAttachment = 0,
2046 .clearValue = *clear_val,
2047 };
2048
2049 if (vk_format_is_color(format)) {
2050 if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout,
2051 in_render_loop, &clear_rect,
2052 clear_att.clearValue.color, 0)) {
2053 radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
2054 clear_att.colorAttachment,
2055 NULL, NULL);
2056 return true;
2057 }
2058 } else {
2059 if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
2060 in_render_loop,range->aspectMask,
2061 &clear_rect, clear_att.clearValue.depthStencil,
2062 0)) {
2063 radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
2064 NULL, NULL);
2065 return true;
2066 }
2067 }
2068
2069 return false;
2070 }
2071
2072 static void
2073 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
2074 struct radv_image *image,
2075 VkImageLayout image_layout,
2076 const VkClearValue *clear_value,
2077 uint32_t range_count,
2078 const VkImageSubresourceRange *ranges,
2079 bool cs)
2080 {
2081 VkFormat format = image->vk_format;
2082 VkClearValue internal_clear_value = *clear_value;
2083
2084 if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2085 uint32_t value;
2086 format = VK_FORMAT_R32_UINT;
2087 value = float3_to_rgb9e5(clear_value->color.float32);
2088 internal_clear_value.color.uint32[0] = value;
2089 }
2090
2091 if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2092 uint8_t r, g;
2093 format = VK_FORMAT_R8_UINT;
2094 r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2095 g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2096 internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2097 }
2098
2099 if (format == VK_FORMAT_R32G32B32_UINT ||
2100 format == VK_FORMAT_R32G32B32_SINT ||
2101 format == VK_FORMAT_R32G32B32_SFLOAT)
2102 cs = true;
2103
2104 for (uint32_t r = 0; r < range_count; r++) {
2105 const VkImageSubresourceRange *range = &ranges[r];
2106
2107 /* Try to perform a fast clear first, otherwise fallback to
2108 * the legacy path.
2109 */
2110 if (!cs &&
2111 radv_fast_clear_range(cmd_buffer, image, format,
2112 image_layout, false, range,
2113 &internal_clear_value)) {
2114 continue;
2115 }
2116
2117 for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
2118 const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
2119 radv_minify(image->info.depth, range->baseMipLevel + l) :
2120 radv_get_layerCount(image, range);
2121 for (uint32_t s = 0; s < layer_count; ++s) {
2122
2123 if (cs) {
2124 struct radv_meta_blit2d_surf surf;
2125 surf.format = format;
2126 surf.image = image;
2127 surf.level = range->baseMipLevel + l;
2128 surf.layer = range->baseArrayLayer + s;
2129 surf.aspect_mask = range->aspectMask;
2130 radv_meta_clear_image_cs(cmd_buffer, &surf,
2131 &internal_clear_value.color);
2132 } else {
2133 radv_clear_image_layer(cmd_buffer, image, image_layout,
2134 range, format, l, s, &internal_clear_value);
2135 }
2136 }
2137 }
2138 }
2139 }
2140
2141 void radv_CmdClearColorImage(
2142 VkCommandBuffer commandBuffer,
2143 VkImage image_h,
2144 VkImageLayout imageLayout,
2145 const VkClearColorValue* pColor,
2146 uint32_t rangeCount,
2147 const VkImageSubresourceRange* pRanges)
2148 {
2149 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2150 RADV_FROM_HANDLE(radv_image, image, image_h);
2151 struct radv_meta_saved_state saved_state;
2152 bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
2153
2154 if (cs) {
2155 radv_meta_save(&saved_state, cmd_buffer,
2156 RADV_META_SAVE_COMPUTE_PIPELINE |
2157 RADV_META_SAVE_CONSTANTS |
2158 RADV_META_SAVE_DESCRIPTORS);
2159 } else {
2160 radv_meta_save(&saved_state, cmd_buffer,
2161 RADV_META_SAVE_GRAPHICS_PIPELINE |
2162 RADV_META_SAVE_CONSTANTS);
2163 }
2164
2165 radv_cmd_clear_image(cmd_buffer, image, imageLayout,
2166 (const VkClearValue *) pColor,
2167 rangeCount, pRanges, cs);
2168
2169 radv_meta_restore(&saved_state, cmd_buffer);
2170 }
2171
2172 void radv_CmdClearDepthStencilImage(
2173 VkCommandBuffer commandBuffer,
2174 VkImage image_h,
2175 VkImageLayout imageLayout,
2176 const VkClearDepthStencilValue* pDepthStencil,
2177 uint32_t rangeCount,
2178 const VkImageSubresourceRange* pRanges)
2179 {
2180 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2181 RADV_FROM_HANDLE(radv_image, image, image_h);
2182 struct radv_meta_saved_state saved_state;
2183
2184 radv_meta_save(&saved_state, cmd_buffer,
2185 RADV_META_SAVE_GRAPHICS_PIPELINE |
2186 RADV_META_SAVE_CONSTANTS);
2187
2188 radv_cmd_clear_image(cmd_buffer, image, imageLayout,
2189 (const VkClearValue *) pDepthStencil,
2190 rangeCount, pRanges, false);
2191
2192 radv_meta_restore(&saved_state, cmd_buffer);
2193 }
2194
2195 void radv_CmdClearAttachments(
2196 VkCommandBuffer commandBuffer,
2197 uint32_t attachmentCount,
2198 const VkClearAttachment* pAttachments,
2199 uint32_t rectCount,
2200 const VkClearRect* pRects)
2201 {
2202 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2203 struct radv_meta_saved_state saved_state;
2204 enum radv_cmd_flush_bits pre_flush = 0;
2205 enum radv_cmd_flush_bits post_flush = 0;
2206
2207 if (!cmd_buffer->state.subpass)
2208 return;
2209
2210 radv_meta_save(&saved_state, cmd_buffer,
2211 RADV_META_SAVE_GRAPHICS_PIPELINE |
2212 RADV_META_SAVE_CONSTANTS);
2213
2214 /* FINISHME: We can do better than this dumb loop. It thrashes too much
2215 * state.
2216 */
2217 for (uint32_t a = 0; a < attachmentCount; ++a) {
2218 for (uint32_t r = 0; r < rectCount; ++r) {
2219 emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2220 cmd_buffer->state.subpass->view_mask, false);
2221 }
2222 }
2223
2224 radv_meta_restore(&saved_state, cmd_buffer);
2225 cmd_buffer->state.flush_bits |= post_flush;
2226 }