radv: fix image variable types in meta shaders
[mesa.git] / src / amd / vulkan / radv_meta_resolve_cs.c
1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24
25 #include <assert.h>
26 #include <stdbool.h>
27
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
31 #include "sid.h"
32 #include "vk_format.h"
33
34 static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
35 nir_ssa_def *input)
36 {
37 unsigned i;
38
39 nir_ssa_def *cmp[3];
40 for (i = 0; i < 3; i++)
41 cmp[i] = nir_flt(b, nir_channel(b, input, i),
42 nir_imm_int(b, 0x3b4d2e1c));
43
44 nir_ssa_def *ltvals[3];
45 for (i = 0; i < 3; i++)
46 ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
47 nir_imm_float(b, 12.92));
48
49 nir_ssa_def *gtvals[3];
50
51 for (i = 0; i < 3; i++) {
52 gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
53 nir_imm_float(b, 1.0/2.4));
54 gtvals[i] = nir_fmul(b, gtvals[i],
55 nir_imm_float(b, 1.055));
56 gtvals[i] = nir_fsub(b, gtvals[i],
57 nir_imm_float(b, 0.055));
58 }
59
60 nir_ssa_def *comp[4];
61 for (i = 0; i < 3; i++)
62 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
63 comp[3] = nir_channels(b, input, 1 << 3);
64 return nir_vec(b, comp, 4);
65 }
66
67 static nir_shader *
68 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
69 {
70 nir_builder b;
71 char name[64];
72 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
73 false,
74 false,
75 GLSL_TYPE_FLOAT);
76 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
77 false,
78 GLSL_TYPE_FLOAT);
79 snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
80 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
81 b.shader->info.name = ralloc_strdup(b.shader, name);
82 b.shader->info.cs.local_size[0] = 16;
83 b.shader->info.cs.local_size[1] = 16;
84 b.shader->info.cs.local_size[2] = 1;
85
86 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
87 sampler_type, "s_tex");
88 input_img->data.descriptor_set = 0;
89 input_img->data.binding = 0;
90
91 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
92 img_type, "out_img");
93 output_img->data.descriptor_set = 0;
94 output_img->data.binding = 1;
95 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
96 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
97 nir_ssa_def *block_size = nir_imm_ivec4(&b,
98 b.shader->info.cs.local_size[0],
99 b.shader->info.cs.local_size[1],
100 b.shader->info.cs.local_size[2], 0);
101
102 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
103
104 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
105 nir_intrinsic_set_base(src_offset, 0);
106 nir_intrinsic_set_range(src_offset, 16);
107 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
108 src_offset->num_components = 2;
109 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
110 nir_builder_instr_insert(&b, &src_offset->instr);
111
112 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
113 nir_intrinsic_set_base(dst_offset, 0);
114 nir_intrinsic_set_range(dst_offset, 16);
115 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
116 dst_offset->num_components = 2;
117 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
118 nir_builder_instr_insert(&b, &dst_offset->instr);
119
120 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
121 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
122
123 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
124 color, img_coord);
125
126 nir_ssa_def *outval = nir_load_var(&b, color);
127 if (is_srgb)
128 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
129
130 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
131 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
132 store->num_components = 4;
133 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
134 store->src[1] = nir_src_for_ssa(coord);
135 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
136 store->src[3] = nir_src_for_ssa(outval);
137 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
138 nir_builder_instr_insert(&b, &store->instr);
139 return b.shader;
140 }
141
142 enum {
143 DEPTH_RESOLVE,
144 STENCIL_RESOLVE,
145 };
146
147 static const char *
148 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
149 {
150 switch (resolve_mode) {
151 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
152 return "zero";
153 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
154 return "average";
155 case VK_RESOLVE_MODE_MIN_BIT_KHR:
156 return "min";
157 case VK_RESOLVE_MODE_MAX_BIT_KHR:
158 return "max";
159 default:
160 unreachable("invalid resolve mode");
161 }
162 }
163
164 static nir_shader *
165 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
166 int index,
167 VkResolveModeFlagBits resolve_mode)
168 {
169 nir_builder b;
170 char name[64];
171 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
172 false,
173 false,
174 GLSL_TYPE_FLOAT);
175 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
176 false,
177 GLSL_TYPE_FLOAT);
178 snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
179 index == DEPTH_RESOLVE ? "depth" : "stencil",
180 get_resolve_mode_str(resolve_mode), samples);
181
182 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
183 b.shader->info.name = ralloc_strdup(b.shader, name);
184 b.shader->info.cs.local_size[0] = 16;
185 b.shader->info.cs.local_size[1] = 16;
186 b.shader->info.cs.local_size[2] = 1;
187
188 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
189 sampler_type, "s_tex");
190 input_img->data.descriptor_set = 0;
191 input_img->data.binding = 0;
192
193 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
194 img_type, "out_img");
195 output_img->data.descriptor_set = 0;
196 output_img->data.binding = 1;
197 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
198 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
199 nir_ssa_def *block_size = nir_imm_ivec4(&b,
200 b.shader->info.cs.local_size[0],
201 b.shader->info.cs.local_size[1],
202 b.shader->info.cs.local_size[2], 0);
203
204 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
205
206 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
207 nir_intrinsic_set_base(src_offset, 0);
208 nir_intrinsic_set_range(src_offset, 16);
209 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
210 src_offset->num_components = 2;
211 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
212 nir_builder_instr_insert(&b, &src_offset->instr);
213
214 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
215 nir_intrinsic_set_base(dst_offset, 0);
216 nir_intrinsic_set_range(dst_offset, 16);
217 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
218 dst_offset->num_components = 2;
219 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
220 nir_builder_instr_insert(&b, &dst_offset->instr);
221
222 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
223
224 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
225
226 nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
227
228 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
229 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
230 tex->op = nir_texop_txf_ms;
231 tex->src[0].src_type = nir_tex_src_coord;
232 tex->src[0].src = nir_src_for_ssa(img_coord);
233 tex->src[1].src_type = nir_tex_src_ms_index;
234 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
235 tex->src[2].src_type = nir_tex_src_texture_deref;
236 tex->src[2].src = nir_src_for_ssa(input_img_deref);
237 tex->dest_type = type;
238 tex->is_array = false;
239 tex->coord_components = 2;
240
241 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
242 nir_builder_instr_insert(&b, &tex->instr);
243
244 nir_ssa_def *outval = &tex->dest.ssa;
245
246 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
247 for (int i = 1; i < samples; i++) {
248 nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
249 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
250 tex_add->op = nir_texop_txf_ms;
251 tex_add->src[0].src_type = nir_tex_src_coord;
252 tex_add->src[0].src = nir_src_for_ssa(img_coord);
253 tex_add->src[1].src_type = nir_tex_src_ms_index;
254 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
255 tex_add->src[2].src_type = nir_tex_src_texture_deref;
256 tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
257 tex_add->dest_type = type;
258 tex_add->is_array = false;
259 tex_add->coord_components = 2;
260
261 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
262 nir_builder_instr_insert(&b, &tex_add->instr);
263
264 switch (resolve_mode) {
265 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
266 assert(index == DEPTH_RESOLVE);
267 outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
268 break;
269 case VK_RESOLVE_MODE_MIN_BIT_KHR:
270 if (index == DEPTH_RESOLVE)
271 outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
272 else
273 outval = nir_umin(&b, outval, &tex_add->dest.ssa);
274 break;
275 case VK_RESOLVE_MODE_MAX_BIT_KHR:
276 if (index == DEPTH_RESOLVE)
277 outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
278 else
279 outval = nir_umax(&b, outval, &tex_add->dest.ssa);
280 break;
281 default:
282 unreachable("invalid resolve mode");
283 }
284 }
285
286 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
287 outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
288 }
289
290 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
291 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
292 store->num_components = 4;
293 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
294 store->src[1] = nir_src_for_ssa(coord);
295 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
296 store->src[3] = nir_src_for_ssa(outval);
297 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
298 nir_builder_instr_insert(&b, &store->instr);
299 return b.shader;
300 }
301
302 static VkResult
303 create_layout(struct radv_device *device)
304 {
305 VkResult result;
306 /*
307 * two descriptors one for the image being sampled
308 * one for the buffer being written.
309 */
310 VkDescriptorSetLayoutCreateInfo ds_create_info = {
311 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
312 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
313 .bindingCount = 2,
314 .pBindings = (VkDescriptorSetLayoutBinding[]) {
315 {
316 .binding = 0,
317 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
318 .descriptorCount = 1,
319 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
320 .pImmutableSamplers = NULL
321 },
322 {
323 .binding = 1,
324 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
325 .descriptorCount = 1,
326 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
327 .pImmutableSamplers = NULL
328 },
329 }
330 };
331
332 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
333 &ds_create_info,
334 &device->meta_state.alloc,
335 &device->meta_state.resolve_compute.ds_layout);
336 if (result != VK_SUCCESS)
337 goto fail;
338
339
340 VkPipelineLayoutCreateInfo pl_create_info = {
341 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
342 .setLayoutCount = 1,
343 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
344 .pushConstantRangeCount = 1,
345 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
346 };
347
348 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
349 &pl_create_info,
350 &device->meta_state.alloc,
351 &device->meta_state.resolve_compute.p_layout);
352 if (result != VK_SUCCESS)
353 goto fail;
354 return VK_SUCCESS;
355 fail:
356 return result;
357 }
358
359 static VkResult
360 create_resolve_pipeline(struct radv_device *device,
361 int samples,
362 bool is_integer,
363 bool is_srgb,
364 VkPipeline *pipeline)
365 {
366 VkResult result;
367 struct radv_shader_module cs = { .nir = NULL };
368
369 mtx_lock(&device->meta_state.mtx);
370 if (*pipeline) {
371 mtx_unlock(&device->meta_state.mtx);
372 return VK_SUCCESS;
373 }
374
375 cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
376
377 /* compute shader */
378
379 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
380 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
381 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
382 .module = radv_shader_module_to_handle(&cs),
383 .pName = "main",
384 .pSpecializationInfo = NULL,
385 };
386
387 VkComputePipelineCreateInfo vk_pipeline_info = {
388 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
389 .stage = pipeline_shader_stage,
390 .flags = 0,
391 .layout = device->meta_state.resolve_compute.p_layout,
392 };
393
394 result = radv_CreateComputePipelines(radv_device_to_handle(device),
395 radv_pipeline_cache_to_handle(&device->meta_state.cache),
396 1, &vk_pipeline_info, NULL,
397 pipeline);
398 if (result != VK_SUCCESS)
399 goto fail;
400
401 ralloc_free(cs.nir);
402 mtx_unlock(&device->meta_state.mtx);
403 return VK_SUCCESS;
404 fail:
405 ralloc_free(cs.nir);
406 mtx_unlock(&device->meta_state.mtx);
407 return result;
408 }
409
410 static VkResult
411 create_depth_stencil_resolve_pipeline(struct radv_device *device,
412 int samples,
413 int index,
414 VkResolveModeFlagBits resolve_mode,
415 VkPipeline *pipeline)
416 {
417 VkResult result;
418 struct radv_shader_module cs = { .nir = NULL };
419
420 mtx_lock(&device->meta_state.mtx);
421 if (*pipeline) {
422 mtx_unlock(&device->meta_state.mtx);
423 return VK_SUCCESS;
424 }
425
426 cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
427 index, resolve_mode);
428
429 /* compute shader */
430 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
431 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
432 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
433 .module = radv_shader_module_to_handle(&cs),
434 .pName = "main",
435 .pSpecializationInfo = NULL,
436 };
437
438 VkComputePipelineCreateInfo vk_pipeline_info = {
439 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
440 .stage = pipeline_shader_stage,
441 .flags = 0,
442 .layout = device->meta_state.resolve_compute.p_layout,
443 };
444
445 result = radv_CreateComputePipelines(radv_device_to_handle(device),
446 radv_pipeline_cache_to_handle(&device->meta_state.cache),
447 1, &vk_pipeline_info, NULL,
448 pipeline);
449 if (result != VK_SUCCESS)
450 goto fail;
451
452 ralloc_free(cs.nir);
453 mtx_unlock(&device->meta_state.mtx);
454 return VK_SUCCESS;
455 fail:
456 ralloc_free(cs.nir);
457 mtx_unlock(&device->meta_state.mtx);
458 return result;
459 }
460
461 VkResult
462 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
463 {
464 struct radv_meta_state *state = &device->meta_state;
465 VkResult res;
466
467 res = create_layout(device);
468 if (res != VK_SUCCESS)
469 goto fail;
470
471 if (on_demand)
472 return VK_SUCCESS;
473
474 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
475 uint32_t samples = 1 << i;
476
477 res = create_resolve_pipeline(device, samples, false, false,
478 &state->resolve_compute.rc[i].pipeline);
479 if (res != VK_SUCCESS)
480 goto fail;
481
482 res = create_resolve_pipeline(device, samples, true, false,
483 &state->resolve_compute.rc[i].i_pipeline);
484 if (res != VK_SUCCESS)
485 goto fail;
486
487 res = create_resolve_pipeline(device, samples, false, true,
488 &state->resolve_compute.rc[i].srgb_pipeline);
489 if (res != VK_SUCCESS)
490 goto fail;
491
492 res = create_depth_stencil_resolve_pipeline(device, samples,
493 DEPTH_RESOLVE,
494 VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
495 &state->resolve_compute.depth[i].average_pipeline);
496 if (res != VK_SUCCESS)
497 goto fail;
498
499 res = create_depth_stencil_resolve_pipeline(device, samples,
500 DEPTH_RESOLVE,
501 VK_RESOLVE_MODE_MAX_BIT_KHR,
502 &state->resolve_compute.depth[i].max_pipeline);
503 if (res != VK_SUCCESS)
504 goto fail;
505
506 res = create_depth_stencil_resolve_pipeline(device, samples,
507 DEPTH_RESOLVE,
508 VK_RESOLVE_MODE_MIN_BIT_KHR,
509 &state->resolve_compute.depth[i].min_pipeline);
510 if (res != VK_SUCCESS)
511 goto fail;
512
513 res = create_depth_stencil_resolve_pipeline(device, samples,
514 STENCIL_RESOLVE,
515 VK_RESOLVE_MODE_MAX_BIT_KHR,
516 &state->resolve_compute.stencil[i].max_pipeline);
517 if (res != VK_SUCCESS)
518 goto fail;
519
520 res = create_depth_stencil_resolve_pipeline(device, samples,
521 STENCIL_RESOLVE,
522 VK_RESOLVE_MODE_MIN_BIT_KHR,
523 &state->resolve_compute.stencil[i].min_pipeline);
524 if (res != VK_SUCCESS)
525 goto fail;
526 }
527
528 res = create_depth_stencil_resolve_pipeline(device, 0,
529 DEPTH_RESOLVE,
530 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
531 &state->resolve_compute.depth_zero_pipeline);
532 if (res != VK_SUCCESS)
533 goto fail;
534
535 res = create_depth_stencil_resolve_pipeline(device, 0,
536 STENCIL_RESOLVE,
537 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
538 &state->resolve_compute.stencil_zero_pipeline);
539 if (res != VK_SUCCESS)
540 goto fail;
541
542 return VK_SUCCESS;
543 fail:
544 radv_device_finish_meta_resolve_compute_state(device);
545 return res;
546 }
547
548 void
549 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
550 {
551 struct radv_meta_state *state = &device->meta_state;
552 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
553 radv_DestroyPipeline(radv_device_to_handle(device),
554 state->resolve_compute.rc[i].pipeline,
555 &state->alloc);
556
557 radv_DestroyPipeline(radv_device_to_handle(device),
558 state->resolve_compute.rc[i].i_pipeline,
559 &state->alloc);
560
561 radv_DestroyPipeline(radv_device_to_handle(device),
562 state->resolve_compute.rc[i].srgb_pipeline,
563 &state->alloc);
564
565 radv_DestroyPipeline(radv_device_to_handle(device),
566 state->resolve_compute.depth[i].average_pipeline,
567 &state->alloc);
568
569 radv_DestroyPipeline(radv_device_to_handle(device),
570 state->resolve_compute.depth[i].max_pipeline,
571 &state->alloc);
572
573 radv_DestroyPipeline(radv_device_to_handle(device),
574 state->resolve_compute.depth[i].min_pipeline,
575 &state->alloc);
576
577 radv_DestroyPipeline(radv_device_to_handle(device),
578 state->resolve_compute.stencil[i].max_pipeline,
579 &state->alloc);
580
581 radv_DestroyPipeline(radv_device_to_handle(device),
582 state->resolve_compute.stencil[i].min_pipeline,
583 &state->alloc);
584 }
585
586 radv_DestroyPipeline(radv_device_to_handle(device),
587 state->resolve_compute.depth_zero_pipeline,
588 &state->alloc);
589
590 radv_DestroyPipeline(radv_device_to_handle(device),
591 state->resolve_compute.stencil_zero_pipeline,
592 &state->alloc);
593
594 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
595 state->resolve_compute.ds_layout,
596 &state->alloc);
597 radv_DestroyPipelineLayout(radv_device_to_handle(device),
598 state->resolve_compute.p_layout,
599 &state->alloc);
600 }
601
602 static VkPipeline *
603 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
604 struct radv_image_view *src_iview)
605 {
606 struct radv_device *device = cmd_buffer->device;
607 struct radv_meta_state *state = &device->meta_state;
608 uint32_t samples = src_iview->image->info.samples;
609 uint32_t samples_log2 = ffs(samples) - 1;
610 VkPipeline *pipeline;
611
612 if (vk_format_is_int(src_iview->vk_format))
613 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
614 else if (vk_format_is_srgb(src_iview->vk_format))
615 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
616 else
617 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
618
619 if (!*pipeline) {
620 VkResult ret;
621
622 ret = create_resolve_pipeline(device, samples,
623 vk_format_is_int(src_iview->vk_format),
624 vk_format_is_srgb(src_iview->vk_format),
625 pipeline);
626 if (ret != VK_SUCCESS) {
627 cmd_buffer->record_result = ret;
628 return NULL;
629 }
630 }
631
632 return pipeline;
633 }
634
635 static void
636 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
637 struct radv_image_view *src_iview,
638 struct radv_image_view *dest_iview,
639 const VkOffset2D *src_offset,
640 const VkOffset2D *dest_offset,
641 const VkExtent2D *resolve_extent)
642 {
643 struct radv_device *device = cmd_buffer->device;
644 VkPipeline *pipeline;
645
646 radv_meta_push_descriptor_set(cmd_buffer,
647 VK_PIPELINE_BIND_POINT_COMPUTE,
648 device->meta_state.resolve_compute.p_layout,
649 0, /* set */
650 2, /* descriptorWriteCount */
651 (VkWriteDescriptorSet[]) {
652 {
653 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
654 .dstBinding = 0,
655 .dstArrayElement = 0,
656 .descriptorCount = 1,
657 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
658 .pImageInfo = (VkDescriptorImageInfo[]) {
659 {
660 .sampler = VK_NULL_HANDLE,
661 .imageView = radv_image_view_to_handle(src_iview),
662 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
663 }
664 },
665 {
666 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
667 .dstBinding = 1,
668 .dstArrayElement = 0,
669 .descriptorCount = 1,
670 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
671 .pImageInfo = (VkDescriptorImageInfo[]) {
672 {
673 .sampler = VK_NULL_HANDLE,
674 .imageView = radv_image_view_to_handle(dest_iview),
675 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
676 },
677 }
678 }
679 });
680
681 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
682
683 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
684 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
685
686 unsigned push_constants[4] = {
687 src_offset->x,
688 src_offset->y,
689 dest_offset->x,
690 dest_offset->y,
691 };
692 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
693 device->meta_state.resolve_compute.p_layout,
694 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
695 push_constants);
696 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
697
698 }
699
700 static void
701 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
702 struct radv_image_view *src_iview,
703 struct radv_image_view *dest_iview,
704 const VkOffset2D *src_offset,
705 const VkOffset2D *dest_offset,
706 const VkExtent2D *resolve_extent,
707 VkImageAspectFlags aspects,
708 VkResolveModeFlagBits resolve_mode)
709 {
710 struct radv_device *device = cmd_buffer->device;
711 const uint32_t samples = src_iview->image->info.samples;
712 const uint32_t samples_log2 = ffs(samples) - 1;
713 VkPipeline *pipeline;
714
715 radv_meta_push_descriptor_set(cmd_buffer,
716 VK_PIPELINE_BIND_POINT_COMPUTE,
717 device->meta_state.resolve_compute.p_layout,
718 0, /* set */
719 2, /* descriptorWriteCount */
720 (VkWriteDescriptorSet[]) {
721 {
722 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
723 .dstBinding = 0,
724 .dstArrayElement = 0,
725 .descriptorCount = 1,
726 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
727 .pImageInfo = (VkDescriptorImageInfo[]) {
728 {
729 .sampler = VK_NULL_HANDLE,
730 .imageView = radv_image_view_to_handle(src_iview),
731 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
732 }
733 },
734 {
735 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
736 .dstBinding = 1,
737 .dstArrayElement = 0,
738 .descriptorCount = 1,
739 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
740 .pImageInfo = (VkDescriptorImageInfo[]) {
741 {
742 .sampler = VK_NULL_HANDLE,
743 .imageView = radv_image_view_to_handle(dest_iview),
744 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
745 },
746 }
747 }
748 });
749
750 switch (resolve_mode) {
751 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
752 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
753 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
754 else
755 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
756 break;
757 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
758 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
759 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
760 break;
761 case VK_RESOLVE_MODE_MIN_BIT_KHR:
762 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
763 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
764 else
765 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
766 break;
767 case VK_RESOLVE_MODE_MAX_BIT_KHR:
768 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
769 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
770 else
771 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
772 break;
773 default:
774 unreachable("invalid resolve mode");
775 }
776
777 if (!*pipeline) {
778 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
779 VkResult ret;
780
781 ret = create_depth_stencil_resolve_pipeline(device, samples,
782 index, resolve_mode,
783 pipeline);
784 if (ret != VK_SUCCESS) {
785 cmd_buffer->record_result = ret;
786 return;
787 }
788 }
789
790 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
791 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
792
793 unsigned push_constants[4] = {
794 src_offset->x,
795 src_offset->y,
796 dest_offset->x,
797 dest_offset->y,
798 };
799 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
800 device->meta_state.resolve_compute.p_layout,
801 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
802 push_constants);
803 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
804
805 }
806
807 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
808 struct radv_image *src_image,
809 VkFormat src_format,
810 VkImageLayout src_image_layout,
811 struct radv_image *dest_image,
812 VkFormat dest_format,
813 VkImageLayout dest_image_layout,
814 uint32_t region_count,
815 const VkImageResolve *regions)
816 {
817 struct radv_meta_saved_state saved_state;
818
819 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
820 region_count, regions);
821
822 radv_meta_save(&saved_state, cmd_buffer,
823 RADV_META_SAVE_COMPUTE_PIPELINE |
824 RADV_META_SAVE_CONSTANTS |
825 RADV_META_SAVE_DESCRIPTORS);
826
827 for (uint32_t r = 0; r < region_count; ++r) {
828 const VkImageResolve *region = &regions[r];
829
830 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
831 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
832 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
833
834 const uint32_t src_base_layer =
835 radv_meta_get_iview_layer(src_image, &region->srcSubresource,
836 &region->srcOffset);
837
838 const uint32_t dest_base_layer =
839 radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
840 &region->dstOffset);
841
842 const struct VkExtent3D extent =
843 radv_sanitize_image_extent(src_image->type, region->extent);
844 const struct VkOffset3D srcOffset =
845 radv_sanitize_image_offset(src_image->type, region->srcOffset);
846 const struct VkOffset3D dstOffset =
847 radv_sanitize_image_offset(dest_image->type, region->dstOffset);
848
849 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
850 ++layer) {
851
852 struct radv_image_view src_iview;
853 radv_image_view_init(&src_iview, cmd_buffer->device,
854 &(VkImageViewCreateInfo) {
855 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
856 .image = radv_image_to_handle(src_image),
857 .viewType = radv_meta_get_view_type(src_image),
858 .format = src_format,
859 .subresourceRange = {
860 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
861 .baseMipLevel = region->srcSubresource.mipLevel,
862 .levelCount = 1,
863 .baseArrayLayer = src_base_layer + layer,
864 .layerCount = 1,
865 },
866 }, NULL);
867
868 struct radv_image_view dest_iview;
869 radv_image_view_init(&dest_iview, cmd_buffer->device,
870 &(VkImageViewCreateInfo) {
871 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
872 .image = radv_image_to_handle(dest_image),
873 .viewType = radv_meta_get_view_type(dest_image),
874 .format = vk_to_non_srgb_format(dest_format),
875 .subresourceRange = {
876 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
877 .baseMipLevel = region->dstSubresource.mipLevel,
878 .levelCount = 1,
879 .baseArrayLayer = dest_base_layer + layer,
880 .layerCount = 1,
881 },
882 }, NULL);
883
884 emit_resolve(cmd_buffer,
885 &src_iview,
886 &dest_iview,
887 &(VkOffset2D) {srcOffset.x, srcOffset.y },
888 &(VkOffset2D) {dstOffset.x, dstOffset.y },
889 &(VkExtent2D) {extent.width, extent.height });
890 }
891 }
892 radv_meta_restore(&saved_state, cmd_buffer);
893 }
894
895 /**
896 * Emit any needed resolves for the current subpass.
897 */
898 void
899 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
900 {
901 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
902 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
903 struct radv_subpass_barrier barrier;
904 uint32_t layer_count = fb->layers;
905
906 if (subpass->view_mask)
907 layer_count = util_last_bit(subpass->view_mask);
908
909 /* Resolves happen before the end-of-subpass barriers get executed, so
910 * we have to make the attachment shader-readable.
911 */
912 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
913 barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
914 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
915 radv_subpass_barrier(cmd_buffer, &barrier);
916
917 for (uint32_t i = 0; i < subpass->color_count; ++i) {
918 struct radv_subpass_attachment src_att = subpass->color_attachments[i];
919 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
920
921 if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
922 continue;
923
924 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
925 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
926
927 VkImageResolve region = {
928 .extent = (VkExtent3D){ fb->width, fb->height, 0 },
929 .srcSubresource = (VkImageSubresourceLayers) {
930 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
931 .mipLevel = src_iview->base_mip,
932 .baseArrayLayer = src_iview->base_layer,
933 .layerCount = layer_count,
934 },
935 .dstSubresource = (VkImageSubresourceLayers) {
936 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
937 .mipLevel = dst_iview->base_mip,
938 .baseArrayLayer = dst_iview->base_layer,
939 .layerCount = layer_count,
940 },
941 .srcOffset = (VkOffset3D){ 0, 0, 0 },
942 .dstOffset = (VkOffset3D){ 0, 0, 0 },
943 };
944
945 radv_meta_resolve_compute_image(cmd_buffer,
946 src_iview->image,
947 src_iview->vk_format,
948 src_att.layout,
949 dst_iview->image,
950 dst_iview->vk_format,
951 dst_att.layout,
952 1, &region);
953 }
954
955 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
956 RADV_CMD_FLAG_INV_VCACHE;
957 }
958
959 void
960 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
961 VkImageAspectFlags aspects,
962 VkResolveModeFlagBits resolve_mode)
963 {
964 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
965 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
966 struct radv_meta_saved_state saved_state;
967 struct radv_subpass_barrier barrier;
968 uint32_t layer_count = fb->layers;
969
970 if (subpass->view_mask)
971 layer_count = util_last_bit(subpass->view_mask);
972
973 /* Resolves happen before the end-of-subpass barriers get executed, so
974 * we have to make the attachment shader-readable.
975 */
976 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
977 barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
978 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
979 radv_subpass_barrier(cmd_buffer, &barrier);
980
981 radv_decompress_resolve_subpass_src(cmd_buffer);
982
983 radv_meta_save(&saved_state, cmd_buffer,
984 RADV_META_SAVE_COMPUTE_PIPELINE |
985 RADV_META_SAVE_CONSTANTS |
986 RADV_META_SAVE_DESCRIPTORS);
987
988 struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
989 struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
990
991 struct radv_image_view *src_iview =
992 cmd_buffer->state.attachments[src_att.attachment].iview;
993 struct radv_image_view *dst_iview =
994 cmd_buffer->state.attachments[dest_att.attachment].iview;
995
996 struct radv_image *src_image = src_iview->image;
997 struct radv_image *dst_image = dst_iview->image;
998
999 for (uint32_t layer = 0; layer < layer_count; layer++) {
1000 struct radv_image_view tsrc_iview;
1001 radv_image_view_init(&tsrc_iview, cmd_buffer->device,
1002 &(VkImageViewCreateInfo) {
1003 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1004 .image = radv_image_to_handle(src_image),
1005 .viewType = radv_meta_get_view_type(src_image),
1006 .format = src_iview->vk_format,
1007 .subresourceRange = {
1008 .aspectMask = aspects,
1009 .baseMipLevel = src_iview->base_mip,
1010 .levelCount = 1,
1011 .baseArrayLayer = src_iview->base_layer + layer,
1012 .layerCount = 1,
1013 },
1014 }, NULL);
1015
1016 struct radv_image_view tdst_iview;
1017 radv_image_view_init(&tdst_iview, cmd_buffer->device,
1018 &(VkImageViewCreateInfo) {
1019 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1020 .image = radv_image_to_handle(dst_image),
1021 .viewType = radv_meta_get_view_type(dst_image),
1022 .format = dst_iview->vk_format,
1023 .subresourceRange = {
1024 .aspectMask = aspects,
1025 .baseMipLevel = dst_iview->base_mip,
1026 .levelCount = 1,
1027 .baseArrayLayer = dst_iview->base_layer + layer,
1028 .layerCount = 1,
1029 },
1030 }, NULL);
1031
1032 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
1033 &(VkOffset2D) { 0, 0 },
1034 &(VkOffset2D) { 0, 0 },
1035 &(VkExtent2D) { fb->width, fb->height },
1036 aspects,
1037 resolve_mode);
1038 }
1039
1040 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1041 RADV_CMD_FLAG_INV_VCACHE;
1042
1043 if (radv_image_has_htile(dst_image)) {
1044 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
1045 VkImageSubresourceRange range = {};
1046 range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
1047 range.baseMipLevel = dst_iview->base_mip;
1048 range.levelCount = 1;
1049 range.baseArrayLayer = dst_iview->base_layer;
1050 range.layerCount = layer_count;
1051
1052 uint32_t clear_value = 0xfffc000f;
1053
1054 if (vk_format_is_stencil(dst_image->vk_format) &&
1055 subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
1056 /* Only clear the stencil part of the HTILE
1057 * buffer if it's resolved, otherwise this
1058 * might break if the stencil has been cleared.
1059 */
1060 clear_value = 0xfffff30f;
1061 }
1062
1063 cmd_buffer->state.flush_bits |=
1064 radv_clear_htile(cmd_buffer, dst_image, &range,
1065 clear_value);
1066 }
1067 }
1068
1069 radv_meta_restore(&saved_state, cmd_buffer);
1070 }