radv: update VK_KHR_depth_stencil_resolve for Vulkan 1.2
[mesa.git] / src / amd / vulkan / radv_meta_resolve_cs.c
1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24
25 #include <assert.h>
26 #include <stdbool.h>
27
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
31 #include "sid.h"
32 #include "vk_format.h"
33
34 static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
35 nir_ssa_def *input)
36 {
37 unsigned i;
38
39 nir_ssa_def *cmp[3];
40 for (i = 0; i < 3; i++)
41 cmp[i] = nir_flt(b, nir_channel(b, input, i),
42 nir_imm_int(b, 0x3b4d2e1c));
43
44 nir_ssa_def *ltvals[3];
45 for (i = 0; i < 3; i++)
46 ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
47 nir_imm_float(b, 12.92));
48
49 nir_ssa_def *gtvals[3];
50
51 for (i = 0; i < 3; i++) {
52 gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
53 nir_imm_float(b, 1.0/2.4));
54 gtvals[i] = nir_fmul(b, gtvals[i],
55 nir_imm_float(b, 1.055));
56 gtvals[i] = nir_fsub(b, gtvals[i],
57 nir_imm_float(b, 0.055));
58 }
59
60 nir_ssa_def *comp[4];
61 for (i = 0; i < 3; i++)
62 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
63 comp[3] = nir_channels(b, input, 1 << 3);
64 return nir_vec(b, comp, 4);
65 }
66
67 static nir_shader *
68 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
69 {
70 nir_builder b;
71 char name[64];
72 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
73 false,
74 false,
75 GLSL_TYPE_FLOAT);
76 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
77 false,
78 false,
79 GLSL_TYPE_FLOAT);
80 snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
81 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
82 b.shader->info.name = ralloc_strdup(b.shader, name);
83 b.shader->info.cs.local_size[0] = 16;
84 b.shader->info.cs.local_size[1] = 16;
85 b.shader->info.cs.local_size[2] = 1;
86
87 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
88 sampler_type, "s_tex");
89 input_img->data.descriptor_set = 0;
90 input_img->data.binding = 0;
91
92 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
93 img_type, "out_img");
94 output_img->data.descriptor_set = 0;
95 output_img->data.binding = 1;
96 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
97 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
98 nir_ssa_def *block_size = nir_imm_ivec4(&b,
99 b.shader->info.cs.local_size[0],
100 b.shader->info.cs.local_size[1],
101 b.shader->info.cs.local_size[2], 0);
102
103 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
104
105 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
106 nir_intrinsic_set_base(src_offset, 0);
107 nir_intrinsic_set_range(src_offset, 16);
108 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
109 src_offset->num_components = 2;
110 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
111 nir_builder_instr_insert(&b, &src_offset->instr);
112
113 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
114 nir_intrinsic_set_base(dst_offset, 0);
115 nir_intrinsic_set_range(dst_offset, 16);
116 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
117 dst_offset->num_components = 2;
118 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
119 nir_builder_instr_insert(&b, &dst_offset->instr);
120
121 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
122 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
123
124 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
125 color, img_coord);
126
127 nir_ssa_def *outval = nir_load_var(&b, color);
128 if (is_srgb)
129 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
130
131 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
132 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
133 store->num_components = 4;
134 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
135 store->src[1] = nir_src_for_ssa(coord);
136 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
137 store->src[3] = nir_src_for_ssa(outval);
138 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
139 nir_builder_instr_insert(&b, &store->instr);
140 return b.shader;
141 }
142
143 enum {
144 DEPTH_RESOLVE,
145 STENCIL_RESOLVE,
146 };
147
148 static const char *
149 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
150 {
151 switch (resolve_mode) {
152 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
153 return "zero";
154 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
155 return "average";
156 case VK_RESOLVE_MODE_MIN_BIT_KHR:
157 return "min";
158 case VK_RESOLVE_MODE_MAX_BIT_KHR:
159 return "max";
160 default:
161 unreachable("invalid resolve mode");
162 }
163 }
164
165 static nir_shader *
166 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
167 int index,
168 VkResolveModeFlagBits resolve_mode)
169 {
170 nir_builder b;
171 char name[64];
172 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
173 false,
174 false,
175 GLSL_TYPE_FLOAT);
176 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
177 false,
178 false,
179 GLSL_TYPE_FLOAT);
180 snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
181 index == DEPTH_RESOLVE ? "depth" : "stencil",
182 get_resolve_mode_str(resolve_mode), samples);
183
184 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
185 b.shader->info.name = ralloc_strdup(b.shader, name);
186 b.shader->info.cs.local_size[0] = 16;
187 b.shader->info.cs.local_size[1] = 16;
188 b.shader->info.cs.local_size[2] = 1;
189
190 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
191 sampler_type, "s_tex");
192 input_img->data.descriptor_set = 0;
193 input_img->data.binding = 0;
194
195 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
196 img_type, "out_img");
197 output_img->data.descriptor_set = 0;
198 output_img->data.binding = 1;
199 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
200 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
201 nir_ssa_def *block_size = nir_imm_ivec4(&b,
202 b.shader->info.cs.local_size[0],
203 b.shader->info.cs.local_size[1],
204 b.shader->info.cs.local_size[2], 0);
205
206 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
207
208 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
209 nir_intrinsic_set_base(src_offset, 0);
210 nir_intrinsic_set_range(src_offset, 16);
211 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
212 src_offset->num_components = 2;
213 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
214 nir_builder_instr_insert(&b, &src_offset->instr);
215
216 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
217 nir_intrinsic_set_base(dst_offset, 0);
218 nir_intrinsic_set_range(dst_offset, 16);
219 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
220 dst_offset->num_components = 2;
221 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
222 nir_builder_instr_insert(&b, &dst_offset->instr);
223
224 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
225
226 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
227
228 nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
229
230 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
231 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
232 tex->op = nir_texop_txf_ms;
233 tex->src[0].src_type = nir_tex_src_coord;
234 tex->src[0].src = nir_src_for_ssa(img_coord);
235 tex->src[1].src_type = nir_tex_src_ms_index;
236 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
237 tex->src[2].src_type = nir_tex_src_texture_deref;
238 tex->src[2].src = nir_src_for_ssa(input_img_deref);
239 tex->dest_type = type;
240 tex->is_array = false;
241 tex->coord_components = 2;
242
243 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
244 nir_builder_instr_insert(&b, &tex->instr);
245
246 nir_ssa_def *outval = &tex->dest.ssa;
247
248 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
249 for (int i = 1; i < samples; i++) {
250 nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
251 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
252 tex_add->op = nir_texop_txf_ms;
253 tex_add->src[0].src_type = nir_tex_src_coord;
254 tex_add->src[0].src = nir_src_for_ssa(img_coord);
255 tex_add->src[1].src_type = nir_tex_src_ms_index;
256 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
257 tex_add->src[2].src_type = nir_tex_src_texture_deref;
258 tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
259 tex_add->dest_type = type;
260 tex_add->is_array = false;
261 tex_add->coord_components = 2;
262
263 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
264 nir_builder_instr_insert(&b, &tex_add->instr);
265
266 switch (resolve_mode) {
267 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
268 assert(index == DEPTH_RESOLVE);
269 outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
270 break;
271 case VK_RESOLVE_MODE_MIN_BIT_KHR:
272 if (index == DEPTH_RESOLVE)
273 outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
274 else
275 outval = nir_umin(&b, outval, &tex_add->dest.ssa);
276 break;
277 case VK_RESOLVE_MODE_MAX_BIT_KHR:
278 if (index == DEPTH_RESOLVE)
279 outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
280 else
281 outval = nir_umax(&b, outval, &tex_add->dest.ssa);
282 break;
283 default:
284 unreachable("invalid resolve mode");
285 }
286 }
287
288 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
289 outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
290 }
291
292 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
293 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
294 store->num_components = 4;
295 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
296 store->src[1] = nir_src_for_ssa(coord);
297 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
298 store->src[3] = nir_src_for_ssa(outval);
299 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
300 nir_builder_instr_insert(&b, &store->instr);
301 return b.shader;
302 }
303
304 static VkResult
305 create_layout(struct radv_device *device)
306 {
307 VkResult result;
308 /*
309 * two descriptors one for the image being sampled
310 * one for the buffer being written.
311 */
312 VkDescriptorSetLayoutCreateInfo ds_create_info = {
313 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
314 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
315 .bindingCount = 2,
316 .pBindings = (VkDescriptorSetLayoutBinding[]) {
317 {
318 .binding = 0,
319 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
320 .descriptorCount = 1,
321 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
322 .pImmutableSamplers = NULL
323 },
324 {
325 .binding = 1,
326 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
327 .descriptorCount = 1,
328 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
329 .pImmutableSamplers = NULL
330 },
331 }
332 };
333
334 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
335 &ds_create_info,
336 &device->meta_state.alloc,
337 &device->meta_state.resolve_compute.ds_layout);
338 if (result != VK_SUCCESS)
339 goto fail;
340
341
342 VkPipelineLayoutCreateInfo pl_create_info = {
343 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
344 .setLayoutCount = 1,
345 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
346 .pushConstantRangeCount = 1,
347 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
348 };
349
350 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
351 &pl_create_info,
352 &device->meta_state.alloc,
353 &device->meta_state.resolve_compute.p_layout);
354 if (result != VK_SUCCESS)
355 goto fail;
356 return VK_SUCCESS;
357 fail:
358 return result;
359 }
360
361 static VkResult
362 create_resolve_pipeline(struct radv_device *device,
363 int samples,
364 bool is_integer,
365 bool is_srgb,
366 VkPipeline *pipeline)
367 {
368 VkResult result;
369 struct radv_shader_module cs = { .nir = NULL };
370
371 mtx_lock(&device->meta_state.mtx);
372 if (*pipeline) {
373 mtx_unlock(&device->meta_state.mtx);
374 return VK_SUCCESS;
375 }
376
377 cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
378
379 /* compute shader */
380
381 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
382 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
383 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
384 .module = radv_shader_module_to_handle(&cs),
385 .pName = "main",
386 .pSpecializationInfo = NULL,
387 };
388
389 VkComputePipelineCreateInfo vk_pipeline_info = {
390 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
391 .stage = pipeline_shader_stage,
392 .flags = 0,
393 .layout = device->meta_state.resolve_compute.p_layout,
394 };
395
396 result = radv_CreateComputePipelines(radv_device_to_handle(device),
397 radv_pipeline_cache_to_handle(&device->meta_state.cache),
398 1, &vk_pipeline_info, NULL,
399 pipeline);
400 if (result != VK_SUCCESS)
401 goto fail;
402
403 ralloc_free(cs.nir);
404 mtx_unlock(&device->meta_state.mtx);
405 return VK_SUCCESS;
406 fail:
407 ralloc_free(cs.nir);
408 mtx_unlock(&device->meta_state.mtx);
409 return result;
410 }
411
412 static VkResult
413 create_depth_stencil_resolve_pipeline(struct radv_device *device,
414 int samples,
415 int index,
416 VkResolveModeFlagBits resolve_mode,
417 VkPipeline *pipeline)
418 {
419 VkResult result;
420 struct radv_shader_module cs = { .nir = NULL };
421
422 mtx_lock(&device->meta_state.mtx);
423 if (*pipeline) {
424 mtx_unlock(&device->meta_state.mtx);
425 return VK_SUCCESS;
426 }
427
428 cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
429 index, resolve_mode);
430
431 /* compute shader */
432 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
433 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
434 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
435 .module = radv_shader_module_to_handle(&cs),
436 .pName = "main",
437 .pSpecializationInfo = NULL,
438 };
439
440 VkComputePipelineCreateInfo vk_pipeline_info = {
441 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
442 .stage = pipeline_shader_stage,
443 .flags = 0,
444 .layout = device->meta_state.resolve_compute.p_layout,
445 };
446
447 result = radv_CreateComputePipelines(radv_device_to_handle(device),
448 radv_pipeline_cache_to_handle(&device->meta_state.cache),
449 1, &vk_pipeline_info, NULL,
450 pipeline);
451 if (result != VK_SUCCESS)
452 goto fail;
453
454 ralloc_free(cs.nir);
455 mtx_unlock(&device->meta_state.mtx);
456 return VK_SUCCESS;
457 fail:
458 ralloc_free(cs.nir);
459 mtx_unlock(&device->meta_state.mtx);
460 return result;
461 }
462
463 VkResult
464 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
465 {
466 struct radv_meta_state *state = &device->meta_state;
467 VkResult res;
468
469 res = create_layout(device);
470 if (res != VK_SUCCESS)
471 goto fail;
472
473 if (on_demand)
474 return VK_SUCCESS;
475
476 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
477 uint32_t samples = 1 << i;
478
479 res = create_resolve_pipeline(device, samples, false, false,
480 &state->resolve_compute.rc[i].pipeline);
481 if (res != VK_SUCCESS)
482 goto fail;
483
484 res = create_resolve_pipeline(device, samples, true, false,
485 &state->resolve_compute.rc[i].i_pipeline);
486 if (res != VK_SUCCESS)
487 goto fail;
488
489 res = create_resolve_pipeline(device, samples, false, true,
490 &state->resolve_compute.rc[i].srgb_pipeline);
491 if (res != VK_SUCCESS)
492 goto fail;
493
494 res = create_depth_stencil_resolve_pipeline(device, samples,
495 DEPTH_RESOLVE,
496 VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
497 &state->resolve_compute.depth[i].average_pipeline);
498 if (res != VK_SUCCESS)
499 goto fail;
500
501 res = create_depth_stencil_resolve_pipeline(device, samples,
502 DEPTH_RESOLVE,
503 VK_RESOLVE_MODE_MAX_BIT_KHR,
504 &state->resolve_compute.depth[i].max_pipeline);
505 if (res != VK_SUCCESS)
506 goto fail;
507
508 res = create_depth_stencil_resolve_pipeline(device, samples,
509 DEPTH_RESOLVE,
510 VK_RESOLVE_MODE_MIN_BIT_KHR,
511 &state->resolve_compute.depth[i].min_pipeline);
512 if (res != VK_SUCCESS)
513 goto fail;
514
515 res = create_depth_stencil_resolve_pipeline(device, samples,
516 STENCIL_RESOLVE,
517 VK_RESOLVE_MODE_MAX_BIT_KHR,
518 &state->resolve_compute.stencil[i].max_pipeline);
519 if (res != VK_SUCCESS)
520 goto fail;
521
522 res = create_depth_stencil_resolve_pipeline(device, samples,
523 STENCIL_RESOLVE,
524 VK_RESOLVE_MODE_MIN_BIT_KHR,
525 &state->resolve_compute.stencil[i].min_pipeline);
526 if (res != VK_SUCCESS)
527 goto fail;
528 }
529
530 res = create_depth_stencil_resolve_pipeline(device, 0,
531 DEPTH_RESOLVE,
532 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
533 &state->resolve_compute.depth_zero_pipeline);
534 if (res != VK_SUCCESS)
535 goto fail;
536
537 res = create_depth_stencil_resolve_pipeline(device, 0,
538 STENCIL_RESOLVE,
539 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
540 &state->resolve_compute.stencil_zero_pipeline);
541 if (res != VK_SUCCESS)
542 goto fail;
543
544 return VK_SUCCESS;
545 fail:
546 radv_device_finish_meta_resolve_compute_state(device);
547 return res;
548 }
549
550 void
551 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
552 {
553 struct radv_meta_state *state = &device->meta_state;
554 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
555 radv_DestroyPipeline(radv_device_to_handle(device),
556 state->resolve_compute.rc[i].pipeline,
557 &state->alloc);
558
559 radv_DestroyPipeline(radv_device_to_handle(device),
560 state->resolve_compute.rc[i].i_pipeline,
561 &state->alloc);
562
563 radv_DestroyPipeline(radv_device_to_handle(device),
564 state->resolve_compute.rc[i].srgb_pipeline,
565 &state->alloc);
566
567 radv_DestroyPipeline(radv_device_to_handle(device),
568 state->resolve_compute.depth[i].average_pipeline,
569 &state->alloc);
570
571 radv_DestroyPipeline(radv_device_to_handle(device),
572 state->resolve_compute.depth[i].max_pipeline,
573 &state->alloc);
574
575 radv_DestroyPipeline(radv_device_to_handle(device),
576 state->resolve_compute.depth[i].min_pipeline,
577 &state->alloc);
578
579 radv_DestroyPipeline(radv_device_to_handle(device),
580 state->resolve_compute.stencil[i].max_pipeline,
581 &state->alloc);
582
583 radv_DestroyPipeline(radv_device_to_handle(device),
584 state->resolve_compute.stencil[i].min_pipeline,
585 &state->alloc);
586 }
587
588 radv_DestroyPipeline(radv_device_to_handle(device),
589 state->resolve_compute.depth_zero_pipeline,
590 &state->alloc);
591
592 radv_DestroyPipeline(radv_device_to_handle(device),
593 state->resolve_compute.stencil_zero_pipeline,
594 &state->alloc);
595
596 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
597 state->resolve_compute.ds_layout,
598 &state->alloc);
599 radv_DestroyPipelineLayout(radv_device_to_handle(device),
600 state->resolve_compute.p_layout,
601 &state->alloc);
602 }
603
604 static VkPipeline *
605 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
606 struct radv_image_view *src_iview)
607 {
608 struct radv_device *device = cmd_buffer->device;
609 struct radv_meta_state *state = &device->meta_state;
610 uint32_t samples = src_iview->image->info.samples;
611 uint32_t samples_log2 = ffs(samples) - 1;
612 VkPipeline *pipeline;
613
614 if (vk_format_is_int(src_iview->vk_format))
615 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
616 else if (vk_format_is_srgb(src_iview->vk_format))
617 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
618 else
619 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
620
621 if (!*pipeline) {
622 VkResult ret;
623
624 ret = create_resolve_pipeline(device, samples,
625 vk_format_is_int(src_iview->vk_format),
626 vk_format_is_srgb(src_iview->vk_format),
627 pipeline);
628 if (ret != VK_SUCCESS) {
629 cmd_buffer->record_result = ret;
630 return NULL;
631 }
632 }
633
634 return pipeline;
635 }
636
637 static void
638 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
639 struct radv_image_view *src_iview,
640 struct radv_image_view *dest_iview,
641 const VkOffset2D *src_offset,
642 const VkOffset2D *dest_offset,
643 const VkExtent2D *resolve_extent)
644 {
645 struct radv_device *device = cmd_buffer->device;
646 VkPipeline *pipeline;
647
648 radv_meta_push_descriptor_set(cmd_buffer,
649 VK_PIPELINE_BIND_POINT_COMPUTE,
650 device->meta_state.resolve_compute.p_layout,
651 0, /* set */
652 2, /* descriptorWriteCount */
653 (VkWriteDescriptorSet[]) {
654 {
655 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
656 .dstBinding = 0,
657 .dstArrayElement = 0,
658 .descriptorCount = 1,
659 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
660 .pImageInfo = (VkDescriptorImageInfo[]) {
661 {
662 .sampler = VK_NULL_HANDLE,
663 .imageView = radv_image_view_to_handle(src_iview),
664 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
665 }
666 },
667 {
668 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
669 .dstBinding = 1,
670 .dstArrayElement = 0,
671 .descriptorCount = 1,
672 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
673 .pImageInfo = (VkDescriptorImageInfo[]) {
674 {
675 .sampler = VK_NULL_HANDLE,
676 .imageView = radv_image_view_to_handle(dest_iview),
677 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
678 },
679 }
680 }
681 });
682
683 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
684
685 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
686 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
687
688 unsigned push_constants[4] = {
689 src_offset->x,
690 src_offset->y,
691 dest_offset->x,
692 dest_offset->y,
693 };
694 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
695 device->meta_state.resolve_compute.p_layout,
696 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
697 push_constants);
698 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
699
700 }
701
702 static void
703 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
704 struct radv_image_view *src_iview,
705 struct radv_image_view *dest_iview,
706 const VkOffset2D *src_offset,
707 const VkOffset2D *dest_offset,
708 const VkExtent2D *resolve_extent,
709 VkImageAspectFlags aspects,
710 VkResolveModeFlagBits resolve_mode)
711 {
712 struct radv_device *device = cmd_buffer->device;
713 const uint32_t samples = src_iview->image->info.samples;
714 const uint32_t samples_log2 = ffs(samples) - 1;
715 VkPipeline *pipeline;
716
717 radv_meta_push_descriptor_set(cmd_buffer,
718 VK_PIPELINE_BIND_POINT_COMPUTE,
719 device->meta_state.resolve_compute.p_layout,
720 0, /* set */
721 2, /* descriptorWriteCount */
722 (VkWriteDescriptorSet[]) {
723 {
724 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
725 .dstBinding = 0,
726 .dstArrayElement = 0,
727 .descriptorCount = 1,
728 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
729 .pImageInfo = (VkDescriptorImageInfo[]) {
730 {
731 .sampler = VK_NULL_HANDLE,
732 .imageView = radv_image_view_to_handle(src_iview),
733 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
734 }
735 },
736 {
737 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
738 .dstBinding = 1,
739 .dstArrayElement = 0,
740 .descriptorCount = 1,
741 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
742 .pImageInfo = (VkDescriptorImageInfo[]) {
743 {
744 .sampler = VK_NULL_HANDLE,
745 .imageView = radv_image_view_to_handle(dest_iview),
746 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
747 },
748 }
749 }
750 });
751
752 switch (resolve_mode) {
753 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
754 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
755 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
756 else
757 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
758 break;
759 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
760 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
761 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
762 break;
763 case VK_RESOLVE_MODE_MIN_BIT_KHR:
764 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
765 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
766 else
767 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
768 break;
769 case VK_RESOLVE_MODE_MAX_BIT_KHR:
770 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
771 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
772 else
773 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
774 break;
775 default:
776 unreachable("invalid resolve mode");
777 }
778
779 if (!*pipeline) {
780 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
781 VkResult ret;
782
783 ret = create_depth_stencil_resolve_pipeline(device, samples,
784 index, resolve_mode,
785 pipeline);
786 if (ret != VK_SUCCESS) {
787 cmd_buffer->record_result = ret;
788 return;
789 }
790 }
791
792 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
793 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
794
795 unsigned push_constants[4] = {
796 src_offset->x,
797 src_offset->y,
798 dest_offset->x,
799 dest_offset->y,
800 };
801 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
802 device->meta_state.resolve_compute.p_layout,
803 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
804 push_constants);
805 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
806
807 }
808
809 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
810 struct radv_image *src_image,
811 VkFormat src_format,
812 VkImageLayout src_image_layout,
813 struct radv_image *dest_image,
814 VkFormat dest_format,
815 VkImageLayout dest_image_layout,
816 uint32_t region_count,
817 const VkImageResolve *regions)
818 {
819 struct radv_meta_saved_state saved_state;
820
821 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
822 region_count, regions);
823
824 radv_meta_save(&saved_state, cmd_buffer,
825 RADV_META_SAVE_COMPUTE_PIPELINE |
826 RADV_META_SAVE_CONSTANTS |
827 RADV_META_SAVE_DESCRIPTORS);
828
829 for (uint32_t r = 0; r < region_count; ++r) {
830 const VkImageResolve *region = &regions[r];
831
832 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
833 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
834 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
835
836 const uint32_t src_base_layer =
837 radv_meta_get_iview_layer(src_image, &region->srcSubresource,
838 &region->srcOffset);
839
840 const uint32_t dest_base_layer =
841 radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
842 &region->dstOffset);
843
844 const struct VkExtent3D extent =
845 radv_sanitize_image_extent(src_image->type, region->extent);
846 const struct VkOffset3D srcOffset =
847 radv_sanitize_image_offset(src_image->type, region->srcOffset);
848 const struct VkOffset3D dstOffset =
849 radv_sanitize_image_offset(dest_image->type, region->dstOffset);
850
851 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
852 ++layer) {
853
854 struct radv_image_view src_iview;
855 radv_image_view_init(&src_iview, cmd_buffer->device,
856 &(VkImageViewCreateInfo) {
857 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
858 .image = radv_image_to_handle(src_image),
859 .viewType = radv_meta_get_view_type(src_image),
860 .format = src_format,
861 .subresourceRange = {
862 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
863 .baseMipLevel = region->srcSubresource.mipLevel,
864 .levelCount = 1,
865 .baseArrayLayer = src_base_layer + layer,
866 .layerCount = 1,
867 },
868 }, NULL);
869
870 struct radv_image_view dest_iview;
871 radv_image_view_init(&dest_iview, cmd_buffer->device,
872 &(VkImageViewCreateInfo) {
873 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
874 .image = radv_image_to_handle(dest_image),
875 .viewType = radv_meta_get_view_type(dest_image),
876 .format = vk_to_non_srgb_format(dest_format),
877 .subresourceRange = {
878 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
879 .baseMipLevel = region->dstSubresource.mipLevel,
880 .levelCount = 1,
881 .baseArrayLayer = dest_base_layer + layer,
882 .layerCount = 1,
883 },
884 }, NULL);
885
886 emit_resolve(cmd_buffer,
887 &src_iview,
888 &dest_iview,
889 &(VkOffset2D) {srcOffset.x, srcOffset.y },
890 &(VkOffset2D) {dstOffset.x, dstOffset.y },
891 &(VkExtent2D) {extent.width, extent.height });
892 }
893 }
894 radv_meta_restore(&saved_state, cmd_buffer);
895 }
896
897 /**
898 * Emit any needed resolves for the current subpass.
899 */
900 void
901 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
902 {
903 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
904 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
905 struct radv_subpass_barrier barrier;
906 uint32_t layer_count = fb->layers;
907
908 if (subpass->view_mask)
909 layer_count = util_last_bit(subpass->view_mask);
910
911 /* Resolves happen before the end-of-subpass barriers get executed, so
912 * we have to make the attachment shader-readable.
913 */
914 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
915 barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
916 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
917 radv_subpass_barrier(cmd_buffer, &barrier);
918
919 for (uint32_t i = 0; i < subpass->color_count; ++i) {
920 struct radv_subpass_attachment src_att = subpass->color_attachments[i];
921 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
922
923 if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
924 continue;
925
926 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
927 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
928
929 VkImageResolve region = {
930 .extent = (VkExtent3D){ fb->width, fb->height, 0 },
931 .srcSubresource = (VkImageSubresourceLayers) {
932 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
933 .mipLevel = src_iview->base_mip,
934 .baseArrayLayer = src_iview->base_layer,
935 .layerCount = layer_count,
936 },
937 .dstSubresource = (VkImageSubresourceLayers) {
938 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
939 .mipLevel = dst_iview->base_mip,
940 .baseArrayLayer = dst_iview->base_layer,
941 .layerCount = layer_count,
942 },
943 .srcOffset = (VkOffset3D){ 0, 0, 0 },
944 .dstOffset = (VkOffset3D){ 0, 0, 0 },
945 };
946
947 radv_meta_resolve_compute_image(cmd_buffer,
948 src_iview->image,
949 src_iview->vk_format,
950 src_att.layout,
951 dst_iview->image,
952 dst_iview->vk_format,
953 dst_att.layout,
954 1, &region);
955 }
956
957 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
958 RADV_CMD_FLAG_INV_VCACHE;
959 }
960
961 void
962 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
963 VkImageAspectFlags aspects,
964 VkResolveModeFlagBits resolve_mode)
965 {
966 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
967 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
968 struct radv_meta_saved_state saved_state;
969 struct radv_subpass_barrier barrier;
970 uint32_t layer_count = fb->layers;
971
972 if (subpass->view_mask)
973 layer_count = util_last_bit(subpass->view_mask);
974
975 /* Resolves happen before the end-of-subpass barriers get executed, so
976 * we have to make the attachment shader-readable.
977 */
978 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
979 barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
980 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
981 radv_subpass_barrier(cmd_buffer, &barrier);
982
983 radv_decompress_resolve_subpass_src(cmd_buffer);
984
985 radv_meta_save(&saved_state, cmd_buffer,
986 RADV_META_SAVE_COMPUTE_PIPELINE |
987 RADV_META_SAVE_CONSTANTS |
988 RADV_META_SAVE_DESCRIPTORS);
989
990 struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
991 struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
992
993 struct radv_image_view *src_iview =
994 cmd_buffer->state.attachments[src_att.attachment].iview;
995 struct radv_image_view *dst_iview =
996 cmd_buffer->state.attachments[dest_att.attachment].iview;
997
998 struct radv_image *src_image = src_iview->image;
999 struct radv_image *dst_image = dst_iview->image;
1000
1001 for (uint32_t layer = 0; layer < layer_count; layer++) {
1002 struct radv_image_view tsrc_iview;
1003 radv_image_view_init(&tsrc_iview, cmd_buffer->device,
1004 &(VkImageViewCreateInfo) {
1005 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1006 .image = radv_image_to_handle(src_image),
1007 .viewType = radv_meta_get_view_type(src_image),
1008 .format = src_iview->vk_format,
1009 .subresourceRange = {
1010 .aspectMask = aspects,
1011 .baseMipLevel = src_iview->base_mip,
1012 .levelCount = 1,
1013 .baseArrayLayer = src_iview->base_layer + layer,
1014 .layerCount = 1,
1015 },
1016 }, NULL);
1017
1018 struct radv_image_view tdst_iview;
1019 radv_image_view_init(&tdst_iview, cmd_buffer->device,
1020 &(VkImageViewCreateInfo) {
1021 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1022 .image = radv_image_to_handle(dst_image),
1023 .viewType = radv_meta_get_view_type(dst_image),
1024 .format = dst_iview->vk_format,
1025 .subresourceRange = {
1026 .aspectMask = aspects,
1027 .baseMipLevel = dst_iview->base_mip,
1028 .levelCount = 1,
1029 .baseArrayLayer = dst_iview->base_layer + layer,
1030 .layerCount = 1,
1031 },
1032 }, NULL);
1033
1034 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
1035 &(VkOffset2D) { 0, 0 },
1036 &(VkOffset2D) { 0, 0 },
1037 &(VkExtent2D) { fb->width, fb->height },
1038 aspects,
1039 resolve_mode);
1040 }
1041
1042 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1043 RADV_CMD_FLAG_INV_VCACHE;
1044
1045 if (radv_image_has_htile(dst_image)) {
1046 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
1047 VkImageSubresourceRange range = {};
1048 range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
1049 range.baseMipLevel = dst_iview->base_mip;
1050 range.levelCount = 1;
1051 range.baseArrayLayer = dst_iview->base_layer;
1052 range.layerCount = layer_count;
1053
1054 uint32_t clear_value = 0xfffc000f;
1055
1056 if (vk_format_is_stencil(dst_image->vk_format) &&
1057 subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
1058 /* Only clear the stencil part of the HTILE
1059 * buffer if it's resolved, otherwise this
1060 * might break if the stencil has been cleared.
1061 */
1062 clear_value = 0xfffff30f;
1063 }
1064
1065 cmd_buffer->state.flush_bits |=
1066 radv_clear_htile(cmd_buffer, dst_image, &range,
1067 clear_value);
1068 }
1069 }
1070
1071 radv_meta_restore(&saved_state, cmd_buffer);
1072 }