radv: implement all depth/stencil resolve modes using compute
[mesa.git] / src / amd / vulkan / radv_meta_resolve_cs.c
1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24
25 #include <assert.h>
26 #include <stdbool.h>
27
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
31 #include "sid.h"
32 #include "vk_format.h"
33
34 static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
35 nir_ssa_def *input)
36 {
37 unsigned i;
38
39 nir_ssa_def *cmp[3];
40 for (i = 0; i < 3; i++)
41 cmp[i] = nir_flt(b, nir_channel(b, input, i),
42 nir_imm_int(b, 0x3b4d2e1c));
43
44 nir_ssa_def *ltvals[3];
45 for (i = 0; i < 3; i++)
46 ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
47 nir_imm_float(b, 12.92));
48
49 nir_ssa_def *gtvals[3];
50
51 for (i = 0; i < 3; i++) {
52 gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
53 nir_imm_float(b, 1.0/2.4));
54 gtvals[i] = nir_fmul(b, gtvals[i],
55 nir_imm_float(b, 1.055));
56 gtvals[i] = nir_fsub(b, gtvals[i],
57 nir_imm_float(b, 0.055));
58 }
59
60 nir_ssa_def *comp[4];
61 for (i = 0; i < 3; i++)
62 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
63 comp[3] = nir_channels(b, input, 1 << 3);
64 return nir_vec(b, comp, 4);
65 }
66
67 static nir_shader *
68 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
69 {
70 nir_builder b;
71 char name[64];
72 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
73 false,
74 false,
75 GLSL_TYPE_FLOAT);
76 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
77 false,
78 false,
79 GLSL_TYPE_FLOAT);
80 snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
81 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
82 b.shader->info.name = ralloc_strdup(b.shader, name);
83 b.shader->info.cs.local_size[0] = 16;
84 b.shader->info.cs.local_size[1] = 16;
85 b.shader->info.cs.local_size[2] = 1;
86
87 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
88 sampler_type, "s_tex");
89 input_img->data.descriptor_set = 0;
90 input_img->data.binding = 0;
91
92 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
93 img_type, "out_img");
94 output_img->data.descriptor_set = 0;
95 output_img->data.binding = 1;
96 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
97 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
98 nir_ssa_def *block_size = nir_imm_ivec4(&b,
99 b.shader->info.cs.local_size[0],
100 b.shader->info.cs.local_size[1],
101 b.shader->info.cs.local_size[2], 0);
102
103 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
104
105 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
106 nir_intrinsic_set_base(src_offset, 0);
107 nir_intrinsic_set_range(src_offset, 16);
108 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
109 src_offset->num_components = 2;
110 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
111 nir_builder_instr_insert(&b, &src_offset->instr);
112
113 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
114 nir_intrinsic_set_base(dst_offset, 0);
115 nir_intrinsic_set_range(dst_offset, 16);
116 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
117 dst_offset->num_components = 2;
118 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
119 nir_builder_instr_insert(&b, &dst_offset->instr);
120
121 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
122 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
123
124 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
125 color, img_coord);
126
127 nir_ssa_def *outval = nir_load_var(&b, color);
128 if (is_srgb)
129 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
130
131 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
132 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
133 store->num_components = 4;
134 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
135 store->src[1] = nir_src_for_ssa(coord);
136 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
137 store->src[3] = nir_src_for_ssa(outval);
138 nir_builder_instr_insert(&b, &store->instr);
139 return b.shader;
140 }
141
142 enum {
143 DEPTH_RESOLVE,
144 STENCIL_RESOLVE,
145 };
146
147 static const char *
148 get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode)
149 {
150 switch (resolve_mode) {
151 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
152 return "zero";
153 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
154 return "average";
155 case VK_RESOLVE_MODE_MIN_BIT_KHR:
156 return "min";
157 case VK_RESOLVE_MODE_MAX_BIT_KHR:
158 return "max";
159 default:
160 unreachable("invalid resolve mode");
161 }
162 }
163
164 static nir_shader *
165 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
166 int index,
167 VkResolveModeFlagBitsKHR resolve_mode)
168 {
169 nir_builder b;
170 char name[64];
171 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
172 false,
173 false,
174 GLSL_TYPE_FLOAT);
175 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
176 false,
177 false,
178 GLSL_TYPE_FLOAT);
179 snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
180 index == DEPTH_RESOLVE ? "depth" : "stencil",
181 get_resolve_mode_str(resolve_mode), samples);
182
183 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
184 b.shader->info.name = ralloc_strdup(b.shader, name);
185 b.shader->info.cs.local_size[0] = 16;
186 b.shader->info.cs.local_size[1] = 16;
187 b.shader->info.cs.local_size[2] = 1;
188
189 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
190 sampler_type, "s_tex");
191 input_img->data.descriptor_set = 0;
192 input_img->data.binding = 0;
193
194 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
195 img_type, "out_img");
196 output_img->data.descriptor_set = 0;
197 output_img->data.binding = 1;
198 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
199 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
200 nir_ssa_def *block_size = nir_imm_ivec4(&b,
201 b.shader->info.cs.local_size[0],
202 b.shader->info.cs.local_size[1],
203 b.shader->info.cs.local_size[2], 0);
204
205 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
206
207 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
208 nir_intrinsic_set_base(src_offset, 0);
209 nir_intrinsic_set_range(src_offset, 16);
210 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
211 src_offset->num_components = 2;
212 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
213 nir_builder_instr_insert(&b, &src_offset->instr);
214
215 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
216 nir_intrinsic_set_base(dst_offset, 0);
217 nir_intrinsic_set_range(dst_offset, 16);
218 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
219 dst_offset->num_components = 2;
220 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
221 nir_builder_instr_insert(&b, &dst_offset->instr);
222
223 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
224
225 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
226
227 nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
228
229 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
230 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
231 tex->op = nir_texop_txf_ms;
232 tex->src[0].src_type = nir_tex_src_coord;
233 tex->src[0].src = nir_src_for_ssa(img_coord);
234 tex->src[1].src_type = nir_tex_src_ms_index;
235 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
236 tex->src[2].src_type = nir_tex_src_texture_deref;
237 tex->src[2].src = nir_src_for_ssa(input_img_deref);
238 tex->dest_type = type;
239 tex->is_array = false;
240 tex->coord_components = 2;
241
242 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
243 nir_builder_instr_insert(&b, &tex->instr);
244
245 nir_ssa_def *outval = &tex->dest.ssa;
246
247 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
248 for (int i = 1; i < samples; i++) {
249 nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
250 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
251 tex_add->op = nir_texop_txf_ms;
252 tex_add->src[0].src_type = nir_tex_src_coord;
253 tex_add->src[0].src = nir_src_for_ssa(img_coord);
254 tex_add->src[1].src_type = nir_tex_src_ms_index;
255 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
256 tex_add->src[2].src_type = nir_tex_src_texture_deref;
257 tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
258 tex_add->dest_type = type;
259 tex_add->is_array = false;
260 tex_add->coord_components = 2;
261
262 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
263 nir_builder_instr_insert(&b, &tex_add->instr);
264
265 switch (resolve_mode) {
266 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
267 assert(index == DEPTH_RESOLVE);
268 outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
269 break;
270 case VK_RESOLVE_MODE_MIN_BIT_KHR:
271 if (index == DEPTH_RESOLVE)
272 outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
273 else
274 outval = nir_umin(&b, outval, &tex_add->dest.ssa);
275 break;
276 case VK_RESOLVE_MODE_MAX_BIT_KHR:
277 if (index == DEPTH_RESOLVE)
278 outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
279 else
280 outval = nir_umax(&b, outval, &tex_add->dest.ssa);
281 break;
282 default:
283 unreachable("invalid resolve mode");
284 }
285 }
286
287 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
288 outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
289 }
290
291 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
292 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
293 store->num_components = 4;
294 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
295 store->src[1] = nir_src_for_ssa(coord);
296 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
297 store->src[3] = nir_src_for_ssa(outval);
298 nir_builder_instr_insert(&b, &store->instr);
299 return b.shader;
300 }
301
302 static VkResult
303 create_layout(struct radv_device *device)
304 {
305 VkResult result;
306 /*
307 * two descriptors one for the image being sampled
308 * one for the buffer being written.
309 */
310 VkDescriptorSetLayoutCreateInfo ds_create_info = {
311 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
312 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
313 .bindingCount = 2,
314 .pBindings = (VkDescriptorSetLayoutBinding[]) {
315 {
316 .binding = 0,
317 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
318 .descriptorCount = 1,
319 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
320 .pImmutableSamplers = NULL
321 },
322 {
323 .binding = 1,
324 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
325 .descriptorCount = 1,
326 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
327 .pImmutableSamplers = NULL
328 },
329 }
330 };
331
332 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
333 &ds_create_info,
334 &device->meta_state.alloc,
335 &device->meta_state.resolve_compute.ds_layout);
336 if (result != VK_SUCCESS)
337 goto fail;
338
339
340 VkPipelineLayoutCreateInfo pl_create_info = {
341 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
342 .setLayoutCount = 1,
343 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
344 .pushConstantRangeCount = 1,
345 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
346 };
347
348 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
349 &pl_create_info,
350 &device->meta_state.alloc,
351 &device->meta_state.resolve_compute.p_layout);
352 if (result != VK_SUCCESS)
353 goto fail;
354 return VK_SUCCESS;
355 fail:
356 return result;
357 }
358
359 static VkResult
360 create_resolve_pipeline(struct radv_device *device,
361 int samples,
362 bool is_integer,
363 bool is_srgb,
364 VkPipeline *pipeline)
365 {
366 VkResult result;
367 struct radv_shader_module cs = { .nir = NULL };
368
369 mtx_lock(&device->meta_state.mtx);
370 if (*pipeline) {
371 mtx_unlock(&device->meta_state.mtx);
372 return VK_SUCCESS;
373 }
374
375 cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
376
377 /* compute shader */
378
379 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
380 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
381 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
382 .module = radv_shader_module_to_handle(&cs),
383 .pName = "main",
384 .pSpecializationInfo = NULL,
385 };
386
387 VkComputePipelineCreateInfo vk_pipeline_info = {
388 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
389 .stage = pipeline_shader_stage,
390 .flags = 0,
391 .layout = device->meta_state.resolve_compute.p_layout,
392 };
393
394 result = radv_CreateComputePipelines(radv_device_to_handle(device),
395 radv_pipeline_cache_to_handle(&device->meta_state.cache),
396 1, &vk_pipeline_info, NULL,
397 pipeline);
398 if (result != VK_SUCCESS)
399 goto fail;
400
401 ralloc_free(cs.nir);
402 mtx_unlock(&device->meta_state.mtx);
403 return VK_SUCCESS;
404 fail:
405 ralloc_free(cs.nir);
406 mtx_unlock(&device->meta_state.mtx);
407 return result;
408 }
409
410 static VkResult
411 create_depth_stencil_resolve_pipeline(struct radv_device *device,
412 int samples,
413 int index,
414 VkResolveModeFlagBitsKHR resolve_mode,
415 VkPipeline *pipeline)
416 {
417 VkResult result;
418 struct radv_shader_module cs = { .nir = NULL };
419
420 mtx_lock(&device->meta_state.mtx);
421 if (*pipeline) {
422 mtx_unlock(&device->meta_state.mtx);
423 return VK_SUCCESS;
424 }
425
426 cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
427 index, resolve_mode);
428
429 /* compute shader */
430 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
431 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
432 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
433 .module = radv_shader_module_to_handle(&cs),
434 .pName = "main",
435 .pSpecializationInfo = NULL,
436 };
437
438 VkComputePipelineCreateInfo vk_pipeline_info = {
439 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
440 .stage = pipeline_shader_stage,
441 .flags = 0,
442 .layout = device->meta_state.resolve_compute.p_layout,
443 };
444
445 result = radv_CreateComputePipelines(radv_device_to_handle(device),
446 radv_pipeline_cache_to_handle(&device->meta_state.cache),
447 1, &vk_pipeline_info, NULL,
448 pipeline);
449 if (result != VK_SUCCESS)
450 goto fail;
451
452 ralloc_free(cs.nir);
453 mtx_unlock(&device->meta_state.mtx);
454 return VK_SUCCESS;
455 fail:
456 ralloc_free(cs.nir);
457 mtx_unlock(&device->meta_state.mtx);
458 return result;
459 }
460
461 VkResult
462 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
463 {
464 struct radv_meta_state *state = &device->meta_state;
465 VkResult res;
466
467 res = create_layout(device);
468 if (res != VK_SUCCESS)
469 goto fail;
470
471 if (on_demand)
472 return VK_SUCCESS;
473
474 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
475 uint32_t samples = 1 << i;
476
477 res = create_resolve_pipeline(device, samples, false, false,
478 &state->resolve_compute.rc[i].pipeline);
479 if (res != VK_SUCCESS)
480 goto fail;
481
482 res = create_resolve_pipeline(device, samples, true, false,
483 &state->resolve_compute.rc[i].i_pipeline);
484 if (res != VK_SUCCESS)
485 goto fail;
486
487 res = create_resolve_pipeline(device, samples, false, true,
488 &state->resolve_compute.rc[i].srgb_pipeline);
489 if (res != VK_SUCCESS)
490 goto fail;
491
492 res = create_depth_stencil_resolve_pipeline(device, samples,
493 DEPTH_RESOLVE,
494 VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
495 &state->resolve_compute.depth[i].average_pipeline);
496 if (res != VK_SUCCESS)
497 goto fail;
498
499 res = create_depth_stencil_resolve_pipeline(device, samples,
500 DEPTH_RESOLVE,
501 VK_RESOLVE_MODE_MAX_BIT_KHR,
502 &state->resolve_compute.depth[i].max_pipeline);
503 if (res != VK_SUCCESS)
504 goto fail;
505
506 res = create_depth_stencil_resolve_pipeline(device, samples,
507 DEPTH_RESOLVE,
508 VK_RESOLVE_MODE_MIN_BIT_KHR,
509 &state->resolve_compute.depth[i].min_pipeline);
510 if (res != VK_SUCCESS)
511 goto fail;
512
513 res = create_depth_stencil_resolve_pipeline(device, samples,
514 STENCIL_RESOLVE,
515 VK_RESOLVE_MODE_MAX_BIT_KHR,
516 &state->resolve_compute.stencil[i].max_pipeline);
517 if (res != VK_SUCCESS)
518 goto fail;
519
520 res = create_depth_stencil_resolve_pipeline(device, samples,
521 STENCIL_RESOLVE,
522 VK_RESOLVE_MODE_MIN_BIT_KHR,
523 &state->resolve_compute.stencil[i].min_pipeline);
524 if (res != VK_SUCCESS)
525 goto fail;
526 }
527
528 res = create_depth_stencil_resolve_pipeline(device, 0,
529 DEPTH_RESOLVE,
530 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
531 &state->resolve_compute.depth_zero_pipeline);
532 if (res != VK_SUCCESS)
533 goto fail;
534
535 res = create_depth_stencil_resolve_pipeline(device, 0,
536 STENCIL_RESOLVE,
537 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
538 &state->resolve_compute.stencil_zero_pipeline);
539 if (res != VK_SUCCESS)
540 goto fail;
541
542 return VK_SUCCESS;
543 fail:
544 radv_device_finish_meta_resolve_compute_state(device);
545 return res;
546 }
547
548 void
549 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
550 {
551 struct radv_meta_state *state = &device->meta_state;
552 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
553 radv_DestroyPipeline(radv_device_to_handle(device),
554 state->resolve_compute.rc[i].pipeline,
555 &state->alloc);
556
557 radv_DestroyPipeline(radv_device_to_handle(device),
558 state->resolve_compute.rc[i].i_pipeline,
559 &state->alloc);
560
561 radv_DestroyPipeline(radv_device_to_handle(device),
562 state->resolve_compute.rc[i].srgb_pipeline,
563 &state->alloc);
564
565 radv_DestroyPipeline(radv_device_to_handle(device),
566 state->resolve_compute.depth[i].average_pipeline,
567 &state->alloc);
568
569 radv_DestroyPipeline(radv_device_to_handle(device),
570 state->resolve_compute.depth[i].max_pipeline,
571 &state->alloc);
572
573 radv_DestroyPipeline(radv_device_to_handle(device),
574 state->resolve_compute.depth[i].min_pipeline,
575 &state->alloc);
576
577 radv_DestroyPipeline(radv_device_to_handle(device),
578 state->resolve_compute.stencil[i].max_pipeline,
579 &state->alloc);
580
581 radv_DestroyPipeline(radv_device_to_handle(device),
582 state->resolve_compute.stencil[i].min_pipeline,
583 &state->alloc);
584 }
585
586 radv_DestroyPipeline(radv_device_to_handle(device),
587 state->resolve_compute.depth_zero_pipeline,
588 &state->alloc);
589
590 radv_DestroyPipeline(radv_device_to_handle(device),
591 state->resolve_compute.stencil_zero_pipeline,
592 &state->alloc);
593
594 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
595 state->resolve_compute.ds_layout,
596 &state->alloc);
597 radv_DestroyPipelineLayout(radv_device_to_handle(device),
598 state->resolve_compute.p_layout,
599 &state->alloc);
600 }
601
602 static VkPipeline *
603 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
604 struct radv_image_view *src_iview)
605 {
606 struct radv_device *device = cmd_buffer->device;
607 struct radv_meta_state *state = &device->meta_state;
608 uint32_t samples = src_iview->image->info.samples;
609 uint32_t samples_log2 = ffs(samples) - 1;
610 VkPipeline *pipeline;
611
612 if (vk_format_is_int(src_iview->vk_format))
613 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
614 else if (vk_format_is_srgb(src_iview->vk_format))
615 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
616 else
617 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
618
619 if (!*pipeline) {
620 VkResult ret;
621
622 ret = create_resolve_pipeline(device, samples,
623 vk_format_is_int(src_iview->vk_format),
624 vk_format_is_srgb(src_iview->vk_format),
625 pipeline);
626 if (ret != VK_SUCCESS) {
627 cmd_buffer->record_result = ret;
628 return NULL;
629 }
630 }
631
632 return pipeline;
633 }
634
635 static void
636 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
637 struct radv_image_view *src_iview,
638 struct radv_image_view *dest_iview,
639 const VkOffset2D *src_offset,
640 const VkOffset2D *dest_offset,
641 const VkExtent2D *resolve_extent)
642 {
643 struct radv_device *device = cmd_buffer->device;
644 VkPipeline *pipeline;
645
646 radv_meta_push_descriptor_set(cmd_buffer,
647 VK_PIPELINE_BIND_POINT_COMPUTE,
648 device->meta_state.resolve_compute.p_layout,
649 0, /* set */
650 2, /* descriptorWriteCount */
651 (VkWriteDescriptorSet[]) {
652 {
653 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
654 .dstBinding = 0,
655 .dstArrayElement = 0,
656 .descriptorCount = 1,
657 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
658 .pImageInfo = (VkDescriptorImageInfo[]) {
659 {
660 .sampler = VK_NULL_HANDLE,
661 .imageView = radv_image_view_to_handle(src_iview),
662 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
663 }
664 },
665 {
666 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
667 .dstBinding = 1,
668 .dstArrayElement = 0,
669 .descriptorCount = 1,
670 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
671 .pImageInfo = (VkDescriptorImageInfo[]) {
672 {
673 .sampler = VK_NULL_HANDLE,
674 .imageView = radv_image_view_to_handle(dest_iview),
675 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
676 },
677 }
678 }
679 });
680
681 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
682
683 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
684 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
685
686 unsigned push_constants[4] = {
687 src_offset->x,
688 src_offset->y,
689 dest_offset->x,
690 dest_offset->y,
691 };
692 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
693 device->meta_state.resolve_compute.p_layout,
694 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
695 push_constants);
696 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
697
698 }
699
700 static void
701 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
702 struct radv_image_view *src_iview,
703 struct radv_image_view *dest_iview,
704 const VkOffset2D *src_offset,
705 const VkOffset2D *dest_offset,
706 const VkExtent2D *resolve_extent,
707 VkImageAspectFlags aspects,
708 VkResolveModeFlagBitsKHR resolve_mode)
709 {
710 struct radv_device *device = cmd_buffer->device;
711 const uint32_t samples = src_iview->image->info.samples;
712 const uint32_t samples_log2 = ffs(samples) - 1;
713 VkPipeline *pipeline;
714
715 radv_meta_push_descriptor_set(cmd_buffer,
716 VK_PIPELINE_BIND_POINT_COMPUTE,
717 device->meta_state.resolve_compute.p_layout,
718 0, /* set */
719 2, /* descriptorWriteCount */
720 (VkWriteDescriptorSet[]) {
721 {
722 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
723 .dstBinding = 0,
724 .dstArrayElement = 0,
725 .descriptorCount = 1,
726 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
727 .pImageInfo = (VkDescriptorImageInfo[]) {
728 {
729 .sampler = VK_NULL_HANDLE,
730 .imageView = radv_image_view_to_handle(src_iview),
731 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
732 }
733 },
734 {
735 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
736 .dstBinding = 1,
737 .dstArrayElement = 0,
738 .descriptorCount = 1,
739 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
740 .pImageInfo = (VkDescriptorImageInfo[]) {
741 {
742 .sampler = VK_NULL_HANDLE,
743 .imageView = radv_image_view_to_handle(dest_iview),
744 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
745 },
746 }
747 }
748 });
749
750 switch (resolve_mode) {
751 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
752 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
753 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
754 else
755 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
756 break;
757 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
758 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
759 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
760 break;
761 case VK_RESOLVE_MODE_MIN_BIT_KHR:
762 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
763 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
764 else
765 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
766 break;
767 case VK_RESOLVE_MODE_MAX_BIT_KHR:
768 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
769 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
770 else
771 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
772 break;
773 default:
774 unreachable("invalid resolve mode");
775 }
776
777 if (!*pipeline) {
778 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
779 VkResult ret;
780
781 ret = create_depth_stencil_resolve_pipeline(device, samples,
782 index, resolve_mode,
783 pipeline);
784 if (ret != VK_SUCCESS) {
785 cmd_buffer->record_result = ret;
786 return;
787 }
788 }
789
790 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
791 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
792
793 unsigned push_constants[4] = {
794 src_offset->x,
795 src_offset->y,
796 dest_offset->x,
797 dest_offset->y,
798 };
799 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
800 device->meta_state.resolve_compute.p_layout,
801 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
802 push_constants);
803 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
804
805 }
806
807 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
808 struct radv_image *src_image,
809 VkFormat src_format,
810 VkImageLayout src_image_layout,
811 struct radv_image *dest_image,
812 VkFormat dest_format,
813 VkImageLayout dest_image_layout,
814 uint32_t region_count,
815 const VkImageResolve *regions)
816 {
817 struct radv_meta_saved_state saved_state;
818
819 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
820 region_count, regions);
821
822 radv_meta_save(&saved_state, cmd_buffer,
823 RADV_META_SAVE_COMPUTE_PIPELINE |
824 RADV_META_SAVE_CONSTANTS |
825 RADV_META_SAVE_DESCRIPTORS);
826
827 for (uint32_t r = 0; r < region_count; ++r) {
828 const VkImageResolve *region = &regions[r];
829
830 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
831 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
832 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
833
834 const uint32_t src_base_layer =
835 radv_meta_get_iview_layer(src_image, &region->srcSubresource,
836 &region->srcOffset);
837
838 const uint32_t dest_base_layer =
839 radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
840 &region->dstOffset);
841
842 const struct VkExtent3D extent =
843 radv_sanitize_image_extent(src_image->type, region->extent);
844 const struct VkOffset3D srcOffset =
845 radv_sanitize_image_offset(src_image->type, region->srcOffset);
846 const struct VkOffset3D dstOffset =
847 radv_sanitize_image_offset(dest_image->type, region->dstOffset);
848
849 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
850 ++layer) {
851
852 struct radv_image_view src_iview;
853 radv_image_view_init(&src_iview, cmd_buffer->device,
854 &(VkImageViewCreateInfo) {
855 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
856 .image = radv_image_to_handle(src_image),
857 .viewType = radv_meta_get_view_type(src_image),
858 .format = src_format,
859 .subresourceRange = {
860 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
861 .baseMipLevel = region->srcSubresource.mipLevel,
862 .levelCount = 1,
863 .baseArrayLayer = src_base_layer + layer,
864 .layerCount = 1,
865 },
866 });
867
868 struct radv_image_view dest_iview;
869 radv_image_view_init(&dest_iview, cmd_buffer->device,
870 &(VkImageViewCreateInfo) {
871 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
872 .image = radv_image_to_handle(dest_image),
873 .viewType = radv_meta_get_view_type(dest_image),
874 .format = vk_to_non_srgb_format(dest_format),
875 .subresourceRange = {
876 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
877 .baseMipLevel = region->dstSubresource.mipLevel,
878 .levelCount = 1,
879 .baseArrayLayer = dest_base_layer + layer,
880 .layerCount = 1,
881 },
882 });
883
884 emit_resolve(cmd_buffer,
885 &src_iview,
886 &dest_iview,
887 &(VkOffset2D) {srcOffset.x, srcOffset.y },
888 &(VkOffset2D) {dstOffset.x, dstOffset.y },
889 &(VkExtent2D) {extent.width, extent.height });
890 }
891 }
892 radv_meta_restore(&saved_state, cmd_buffer);
893 }
894
895 /**
896 * Emit any needed resolves for the current subpass.
897 */
898 void
899 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
900 {
901 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
902 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
903 struct radv_subpass_barrier barrier;
904 uint32_t layer_count = fb->layers;
905
906 if (subpass->view_mask)
907 layer_count = util_last_bit(subpass->view_mask);
908
909 /* Resolves happen before the end-of-subpass barriers get executed, so
910 * we have to make the attachment shader-readable.
911 */
912 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
913 barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
914 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
915 radv_subpass_barrier(cmd_buffer, &barrier);
916
917 for (uint32_t i = 0; i < subpass->color_count; ++i) {
918 struct radv_subpass_attachment src_att = subpass->color_attachments[i];
919 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
920 struct radv_image_view *src_iview = fb->attachments[src_att.attachment].attachment;
921 struct radv_image_view *dst_iview = fb->attachments[dst_att.attachment].attachment;
922
923 if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
924 continue;
925
926 VkImageResolve region = {
927 .extent = (VkExtent3D){ fb->width, fb->height, 0 },
928 .srcSubresource = (VkImageSubresourceLayers) {
929 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
930 .mipLevel = src_iview->base_mip,
931 .baseArrayLayer = src_iview->base_layer,
932 .layerCount = layer_count,
933 },
934 .dstSubresource = (VkImageSubresourceLayers) {
935 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
936 .mipLevel = dst_iview->base_mip,
937 .baseArrayLayer = dst_iview->base_layer,
938 .layerCount = layer_count,
939 },
940 .srcOffset = (VkOffset3D){ 0, 0, 0 },
941 .dstOffset = (VkOffset3D){ 0, 0, 0 },
942 };
943
944 radv_meta_resolve_compute_image(cmd_buffer,
945 src_iview->image,
946 src_iview->vk_format,
947 src_att.layout,
948 dst_iview->image,
949 dst_iview->vk_format,
950 dst_att.layout,
951 1, &region);
952 }
953
954 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
955 RADV_CMD_FLAG_INV_VMEM_L1;
956 }
957
958 void
959 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
960 VkImageAspectFlags aspects,
961 VkResolveModeFlagBitsKHR resolve_mode)
962 {
963 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
964 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
965 struct radv_meta_saved_state saved_state;
966 struct radv_subpass_barrier barrier;
967 uint32_t layer_count = fb->layers;
968
969 if (subpass->view_mask)
970 layer_count = util_last_bit(subpass->view_mask);
971
972 /* Resolves happen before the end-of-subpass barriers get executed, so
973 * we have to make the attachment shader-readable.
974 */
975 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
976 barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
977 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
978 radv_subpass_barrier(cmd_buffer, &barrier);
979
980 radv_decompress_resolve_subpass_src(cmd_buffer);
981
982 radv_meta_save(&saved_state, cmd_buffer,
983 RADV_META_SAVE_COMPUTE_PIPELINE |
984 RADV_META_SAVE_CONSTANTS |
985 RADV_META_SAVE_DESCRIPTORS);
986
987 struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
988 struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
989
990 struct radv_image_view *src_iview =
991 cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
992 struct radv_image_view *dst_iview =
993 cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
994
995 struct radv_image *src_image = src_iview->image;
996 struct radv_image *dst_image = dst_iview->image;
997
998 for (uint32_t layer = 0; layer < layer_count; layer++) {
999 struct radv_image_view tsrc_iview;
1000 radv_image_view_init(&tsrc_iview, cmd_buffer->device,
1001 &(VkImageViewCreateInfo) {
1002 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1003 .image = radv_image_to_handle(src_image),
1004 .viewType = radv_meta_get_view_type(src_image),
1005 .format = src_iview->vk_format,
1006 .subresourceRange = {
1007 .aspectMask = aspects,
1008 .baseMipLevel = src_iview->base_mip,
1009 .levelCount = 1,
1010 .baseArrayLayer = src_iview->base_layer + layer,
1011 .layerCount = 1,
1012 },
1013 });
1014
1015 struct radv_image_view tdst_iview;
1016 radv_image_view_init(&tdst_iview, cmd_buffer->device,
1017 &(VkImageViewCreateInfo) {
1018 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1019 .image = radv_image_to_handle(dst_image),
1020 .viewType = radv_meta_get_view_type(dst_image),
1021 .format = dst_iview->vk_format,
1022 .subresourceRange = {
1023 .aspectMask = aspects,
1024 .baseMipLevel = dst_iview->base_mip,
1025 .levelCount = 1,
1026 .baseArrayLayer = dst_iview->base_layer + layer,
1027 .layerCount = 1,
1028 },
1029 });
1030
1031 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
1032 &(VkOffset2D) { 0, 0 },
1033 &(VkOffset2D) { 0, 0 },
1034 &(VkExtent2D) { fb->width, fb->height },
1035 aspects,
1036 resolve_mode);
1037 }
1038
1039 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1040 RADV_CMD_FLAG_INV_VMEM_L1;
1041
1042 if (radv_image_has_htile(dst_image)) {
1043 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
1044 VkImageSubresourceRange range = {};
1045 range.aspectMask = aspects;
1046 range.baseMipLevel = dst_iview->base_mip;
1047 range.levelCount = 1;
1048 range.baseArrayLayer = dst_iview->base_layer;
1049 range.layerCount = layer_count;
1050
1051 uint32_t clear_value = 0xfffc000f;
1052
1053 if (vk_format_is_stencil(dst_image->vk_format) &&
1054 subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
1055 /* Only clear the stencil part of the HTILE
1056 * buffer if it's resolved, otherwise this
1057 * might break if the stencil has been cleared.
1058 */
1059 clear_value = 0xfffff30f;
1060 }
1061
1062 cmd_buffer->state.flush_bits |=
1063 radv_clear_htile(cmd_buffer, dst_image, &range,
1064 clear_value);
1065 }
1066 }
1067
1068 radv_meta_restore(&saved_state, cmd_buffer);
1069 }