2 * Copyright © 2016 Dave Airlie
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
32 #include "vk_format.h"
34 static nir_ssa_def
*radv_meta_build_resolve_srgb_conversion(nir_builder
*b
,
40 for (i
= 0; i
< 3; i
++)
41 cmp
[i
] = nir_flt(b
, nir_channel(b
, input
, i
),
42 nir_imm_int(b
, 0x3b4d2e1c));
44 nir_ssa_def
*ltvals
[3];
45 for (i
= 0; i
< 3; i
++)
46 ltvals
[i
] = nir_fmul(b
, nir_channel(b
, input
, i
),
47 nir_imm_float(b
, 12.92));
49 nir_ssa_def
*gtvals
[3];
51 for (i
= 0; i
< 3; i
++) {
52 gtvals
[i
] = nir_fpow(b
, nir_channel(b
, input
, i
),
53 nir_imm_float(b
, 1.0/2.4));
54 gtvals
[i
] = nir_fmul(b
, gtvals
[i
],
55 nir_imm_float(b
, 1.055));
56 gtvals
[i
] = nir_fsub(b
, gtvals
[i
],
57 nir_imm_float(b
, 0.055));
61 for (i
= 0; i
< 3; i
++)
62 comp
[i
] = nir_bcsel(b
, cmp
[i
], ltvals
[i
], gtvals
[i
]);
63 comp
[3] = nir_channels(b
, input
, 1 << 3);
64 return nir_vec(b
, comp
, 4);
68 build_resolve_compute_shader(struct radv_device
*dev
, bool is_integer
, bool is_srgb
, int samples
)
72 const struct glsl_type
*sampler_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_MS
,
76 const struct glsl_type
*img_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_2D
,
80 snprintf(name
, 64, "meta_resolve_cs-%d-%s", samples
, is_integer
? "int" : (is_srgb
? "srgb" : "float"));
81 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
82 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, name
);
83 b
.shader
->info
.cs
.local_size
[0] = 16;
84 b
.shader
->info
.cs
.local_size
[1] = 16;
85 b
.shader
->info
.cs
.local_size
[2] = 1;
87 nir_variable
*input_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
88 sampler_type
, "s_tex");
89 input_img
->data
.descriptor_set
= 0;
90 input_img
->data
.binding
= 0;
92 nir_variable
*output_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
94 output_img
->data
.descriptor_set
= 0;
95 output_img
->data
.binding
= 1;
96 nir_ssa_def
*invoc_id
= nir_load_local_invocation_id(&b
);
97 nir_ssa_def
*wg_id
= nir_load_work_group_id(&b
);
98 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
99 b
.shader
->info
.cs
.local_size
[0],
100 b
.shader
->info
.cs
.local_size
[1],
101 b
.shader
->info
.cs
.local_size
[2], 0);
103 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
105 nir_intrinsic_instr
*src_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
106 nir_intrinsic_set_base(src_offset
, 0);
107 nir_intrinsic_set_range(src_offset
, 16);
108 src_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
109 src_offset
->num_components
= 2;
110 nir_ssa_dest_init(&src_offset
->instr
, &src_offset
->dest
, 2, 32, "src_offset");
111 nir_builder_instr_insert(&b
, &src_offset
->instr
);
113 nir_intrinsic_instr
*dst_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
114 nir_intrinsic_set_base(dst_offset
, 0);
115 nir_intrinsic_set_range(dst_offset
, 16);
116 dst_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 8));
117 dst_offset
->num_components
= 2;
118 nir_ssa_dest_init(&dst_offset
->instr
, &dst_offset
->dest
, 2, 32, "dst_offset");
119 nir_builder_instr_insert(&b
, &dst_offset
->instr
);
121 nir_ssa_def
*img_coord
= nir_channels(&b
, nir_iadd(&b
, global_id
, &src_offset
->dest
.ssa
), 0x3);
122 nir_variable
*color
= nir_local_variable_create(b
.impl
, glsl_vec4_type(), "color");
124 radv_meta_build_resolve_shader_core(&b
, is_integer
, samples
, input_img
,
127 nir_ssa_def
*outval
= nir_load_var(&b
, color
);
129 outval
= radv_meta_build_resolve_srgb_conversion(&b
, outval
);
131 nir_ssa_def
*coord
= nir_iadd(&b
, global_id
, &dst_offset
->dest
.ssa
);
132 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_image_deref_store
);
133 store
->num_components
= 4;
134 store
->src
[0] = nir_src_for_ssa(&nir_build_deref_var(&b
, output_img
)->dest
.ssa
);
135 store
->src
[1] = nir_src_for_ssa(coord
);
136 store
->src
[2] = nir_src_for_ssa(nir_ssa_undef(&b
, 1, 32));
137 store
->src
[3] = nir_src_for_ssa(outval
);
138 store
->src
[4] = nir_src_for_ssa(nir_imm_int(&b
, 0));
139 nir_builder_instr_insert(&b
, &store
->instr
);
149 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode
)
151 switch (resolve_mode
) {
152 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
:
154 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR
:
156 case VK_RESOLVE_MODE_MIN_BIT_KHR
:
158 case VK_RESOLVE_MODE_MAX_BIT_KHR
:
161 unreachable("invalid resolve mode");
166 build_depth_stencil_resolve_compute_shader(struct radv_device
*dev
, int samples
,
168 VkResolveModeFlagBits resolve_mode
)
172 const struct glsl_type
*sampler_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_MS
,
176 const struct glsl_type
*img_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_2D
,
180 snprintf(name
, 64, "meta_resolve_cs_%s-%s-%d",
181 index
== DEPTH_RESOLVE
? "depth" : "stencil",
182 get_resolve_mode_str(resolve_mode
), samples
);
184 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
185 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, name
);
186 b
.shader
->info
.cs
.local_size
[0] = 16;
187 b
.shader
->info
.cs
.local_size
[1] = 16;
188 b
.shader
->info
.cs
.local_size
[2] = 1;
190 nir_variable
*input_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
191 sampler_type
, "s_tex");
192 input_img
->data
.descriptor_set
= 0;
193 input_img
->data
.binding
= 0;
195 nir_variable
*output_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
196 img_type
, "out_img");
197 output_img
->data
.descriptor_set
= 0;
198 output_img
->data
.binding
= 1;
199 nir_ssa_def
*invoc_id
= nir_load_local_invocation_id(&b
);
200 nir_ssa_def
*wg_id
= nir_load_work_group_id(&b
);
201 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
202 b
.shader
->info
.cs
.local_size
[0],
203 b
.shader
->info
.cs
.local_size
[1],
204 b
.shader
->info
.cs
.local_size
[2], 0);
206 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
208 nir_intrinsic_instr
*src_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
209 nir_intrinsic_set_base(src_offset
, 0);
210 nir_intrinsic_set_range(src_offset
, 16);
211 src_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
212 src_offset
->num_components
= 2;
213 nir_ssa_dest_init(&src_offset
->instr
, &src_offset
->dest
, 2, 32, "src_offset");
214 nir_builder_instr_insert(&b
, &src_offset
->instr
);
216 nir_intrinsic_instr
*dst_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
217 nir_intrinsic_set_base(dst_offset
, 0);
218 nir_intrinsic_set_range(dst_offset
, 16);
219 dst_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 8));
220 dst_offset
->num_components
= 2;
221 nir_ssa_dest_init(&dst_offset
->instr
, &dst_offset
->dest
, 2, 32, "dst_offset");
222 nir_builder_instr_insert(&b
, &dst_offset
->instr
);
224 nir_ssa_def
*img_coord
= nir_channels(&b
, nir_iadd(&b
, global_id
, &src_offset
->dest
.ssa
), 0x3);
226 nir_ssa_def
*input_img_deref
= &nir_build_deref_var(&b
, input_img
)->dest
.ssa
;
228 nir_alu_type type
= index
== DEPTH_RESOLVE
? nir_type_float
: nir_type_uint
;
230 nir_tex_instr
*tex
= nir_tex_instr_create(b
.shader
, 3);
231 tex
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
232 tex
->op
= nir_texop_txf_ms
;
233 tex
->src
[0].src_type
= nir_tex_src_coord
;
234 tex
->src
[0].src
= nir_src_for_ssa(img_coord
);
235 tex
->src
[1].src_type
= nir_tex_src_ms_index
;
236 tex
->src
[1].src
= nir_src_for_ssa(nir_imm_int(&b
, 0));
237 tex
->src
[2].src_type
= nir_tex_src_texture_deref
;
238 tex
->src
[2].src
= nir_src_for_ssa(input_img_deref
);
239 tex
->dest_type
= type
;
240 tex
->is_array
= false;
241 tex
->coord_components
= 2;
243 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
, 4, 32, "tex");
244 nir_builder_instr_insert(&b
, &tex
->instr
);
246 nir_ssa_def
*outval
= &tex
->dest
.ssa
;
248 if (resolve_mode
!= VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
) {
249 for (int i
= 1; i
< samples
; i
++) {
250 nir_tex_instr
*tex_add
= nir_tex_instr_create(b
.shader
, 3);
251 tex_add
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
252 tex_add
->op
= nir_texop_txf_ms
;
253 tex_add
->src
[0].src_type
= nir_tex_src_coord
;
254 tex_add
->src
[0].src
= nir_src_for_ssa(img_coord
);
255 tex_add
->src
[1].src_type
= nir_tex_src_ms_index
;
256 tex_add
->src
[1].src
= nir_src_for_ssa(nir_imm_int(&b
, i
));
257 tex_add
->src
[2].src_type
= nir_tex_src_texture_deref
;
258 tex_add
->src
[2].src
= nir_src_for_ssa(input_img_deref
);
259 tex_add
->dest_type
= type
;
260 tex_add
->is_array
= false;
261 tex_add
->coord_components
= 2;
263 nir_ssa_dest_init(&tex_add
->instr
, &tex_add
->dest
, 4, 32, "tex");
264 nir_builder_instr_insert(&b
, &tex_add
->instr
);
266 switch (resolve_mode
) {
267 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR
:
268 assert(index
== DEPTH_RESOLVE
);
269 outval
= nir_fadd(&b
, outval
, &tex_add
->dest
.ssa
);
271 case VK_RESOLVE_MODE_MIN_BIT_KHR
:
272 if (index
== DEPTH_RESOLVE
)
273 outval
= nir_fmin(&b
, outval
, &tex_add
->dest
.ssa
);
275 outval
= nir_umin(&b
, outval
, &tex_add
->dest
.ssa
);
277 case VK_RESOLVE_MODE_MAX_BIT_KHR
:
278 if (index
== DEPTH_RESOLVE
)
279 outval
= nir_fmax(&b
, outval
, &tex_add
->dest
.ssa
);
281 outval
= nir_umax(&b
, outval
, &tex_add
->dest
.ssa
);
284 unreachable("invalid resolve mode");
288 if (resolve_mode
== VK_RESOLVE_MODE_AVERAGE_BIT_KHR
)
289 outval
= nir_fdiv(&b
, outval
, nir_imm_float(&b
, samples
));
292 nir_ssa_def
*coord
= nir_iadd(&b
, global_id
, &dst_offset
->dest
.ssa
);
293 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_image_deref_store
);
294 store
->num_components
= 4;
295 store
->src
[0] = nir_src_for_ssa(&nir_build_deref_var(&b
, output_img
)->dest
.ssa
);
296 store
->src
[1] = nir_src_for_ssa(coord
);
297 store
->src
[2] = nir_src_for_ssa(nir_ssa_undef(&b
, 1, 32));
298 store
->src
[3] = nir_src_for_ssa(outval
);
299 store
->src
[4] = nir_src_for_ssa(nir_imm_int(&b
, 0));
300 nir_builder_instr_insert(&b
, &store
->instr
);
305 create_layout(struct radv_device
*device
)
309 * two descriptors one for the image being sampled
310 * one for the buffer being written.
312 VkDescriptorSetLayoutCreateInfo ds_create_info
= {
313 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
314 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
316 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
319 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
320 .descriptorCount
= 1,
321 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
322 .pImmutableSamplers
= NULL
326 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
327 .descriptorCount
= 1,
328 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
329 .pImmutableSamplers
= NULL
334 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
336 &device
->meta_state
.alloc
,
337 &device
->meta_state
.resolve_compute
.ds_layout
);
338 if (result
!= VK_SUCCESS
)
342 VkPipelineLayoutCreateInfo pl_create_info
= {
343 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
345 .pSetLayouts
= &device
->meta_state
.resolve_compute
.ds_layout
,
346 .pushConstantRangeCount
= 1,
347 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16},
350 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
352 &device
->meta_state
.alloc
,
353 &device
->meta_state
.resolve_compute
.p_layout
);
354 if (result
!= VK_SUCCESS
)
362 create_resolve_pipeline(struct radv_device
*device
,
366 VkPipeline
*pipeline
)
369 struct radv_shader_module cs
= { .nir
= NULL
};
371 mtx_lock(&device
->meta_state
.mtx
);
373 mtx_unlock(&device
->meta_state
.mtx
);
377 cs
.nir
= build_resolve_compute_shader(device
, is_integer
, is_srgb
, samples
);
381 VkPipelineShaderStageCreateInfo pipeline_shader_stage
= {
382 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
383 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
384 .module
= radv_shader_module_to_handle(&cs
),
386 .pSpecializationInfo
= NULL
,
389 VkComputePipelineCreateInfo vk_pipeline_info
= {
390 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
391 .stage
= pipeline_shader_stage
,
393 .layout
= device
->meta_state
.resolve_compute
.p_layout
,
396 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
397 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
398 1, &vk_pipeline_info
, NULL
,
400 if (result
!= VK_SUCCESS
)
404 mtx_unlock(&device
->meta_state
.mtx
);
408 mtx_unlock(&device
->meta_state
.mtx
);
413 create_depth_stencil_resolve_pipeline(struct radv_device
*device
,
416 VkResolveModeFlagBits resolve_mode
,
417 VkPipeline
*pipeline
)
420 struct radv_shader_module cs
= { .nir
= NULL
};
422 mtx_lock(&device
->meta_state
.mtx
);
424 mtx_unlock(&device
->meta_state
.mtx
);
428 cs
.nir
= build_depth_stencil_resolve_compute_shader(device
, samples
,
429 index
, resolve_mode
);
432 VkPipelineShaderStageCreateInfo pipeline_shader_stage
= {
433 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
434 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
435 .module
= radv_shader_module_to_handle(&cs
),
437 .pSpecializationInfo
= NULL
,
440 VkComputePipelineCreateInfo vk_pipeline_info
= {
441 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
442 .stage
= pipeline_shader_stage
,
444 .layout
= device
->meta_state
.resolve_compute
.p_layout
,
447 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
448 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
449 1, &vk_pipeline_info
, NULL
,
451 if (result
!= VK_SUCCESS
)
455 mtx_unlock(&device
->meta_state
.mtx
);
459 mtx_unlock(&device
->meta_state
.mtx
);
464 radv_device_init_meta_resolve_compute_state(struct radv_device
*device
, bool on_demand
)
466 struct radv_meta_state
*state
= &device
->meta_state
;
469 res
= create_layout(device
);
470 if (res
!= VK_SUCCESS
)
476 for (uint32_t i
= 0; i
< MAX_SAMPLES_LOG2
; ++i
) {
477 uint32_t samples
= 1 << i
;
479 res
= create_resolve_pipeline(device
, samples
, false, false,
480 &state
->resolve_compute
.rc
[i
].pipeline
);
481 if (res
!= VK_SUCCESS
)
484 res
= create_resolve_pipeline(device
, samples
, true, false,
485 &state
->resolve_compute
.rc
[i
].i_pipeline
);
486 if (res
!= VK_SUCCESS
)
489 res
= create_resolve_pipeline(device
, samples
, false, true,
490 &state
->resolve_compute
.rc
[i
].srgb_pipeline
);
491 if (res
!= VK_SUCCESS
)
494 res
= create_depth_stencil_resolve_pipeline(device
, samples
,
496 VK_RESOLVE_MODE_AVERAGE_BIT_KHR
,
497 &state
->resolve_compute
.depth
[i
].average_pipeline
);
498 if (res
!= VK_SUCCESS
)
501 res
= create_depth_stencil_resolve_pipeline(device
, samples
,
503 VK_RESOLVE_MODE_MAX_BIT_KHR
,
504 &state
->resolve_compute
.depth
[i
].max_pipeline
);
505 if (res
!= VK_SUCCESS
)
508 res
= create_depth_stencil_resolve_pipeline(device
, samples
,
510 VK_RESOLVE_MODE_MIN_BIT_KHR
,
511 &state
->resolve_compute
.depth
[i
].min_pipeline
);
512 if (res
!= VK_SUCCESS
)
515 res
= create_depth_stencil_resolve_pipeline(device
, samples
,
517 VK_RESOLVE_MODE_MAX_BIT_KHR
,
518 &state
->resolve_compute
.stencil
[i
].max_pipeline
);
519 if (res
!= VK_SUCCESS
)
522 res
= create_depth_stencil_resolve_pipeline(device
, samples
,
524 VK_RESOLVE_MODE_MIN_BIT_KHR
,
525 &state
->resolve_compute
.stencil
[i
].min_pipeline
);
526 if (res
!= VK_SUCCESS
)
530 res
= create_depth_stencil_resolve_pipeline(device
, 0,
532 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
,
533 &state
->resolve_compute
.depth_zero_pipeline
);
534 if (res
!= VK_SUCCESS
)
537 res
= create_depth_stencil_resolve_pipeline(device
, 0,
539 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
,
540 &state
->resolve_compute
.stencil_zero_pipeline
);
541 if (res
!= VK_SUCCESS
)
546 radv_device_finish_meta_resolve_compute_state(device
);
551 radv_device_finish_meta_resolve_compute_state(struct radv_device
*device
)
553 struct radv_meta_state
*state
= &device
->meta_state
;
554 for (uint32_t i
= 0; i
< MAX_SAMPLES_LOG2
; ++i
) {
555 radv_DestroyPipeline(radv_device_to_handle(device
),
556 state
->resolve_compute
.rc
[i
].pipeline
,
559 radv_DestroyPipeline(radv_device_to_handle(device
),
560 state
->resolve_compute
.rc
[i
].i_pipeline
,
563 radv_DestroyPipeline(radv_device_to_handle(device
),
564 state
->resolve_compute
.rc
[i
].srgb_pipeline
,
567 radv_DestroyPipeline(radv_device_to_handle(device
),
568 state
->resolve_compute
.depth
[i
].average_pipeline
,
571 radv_DestroyPipeline(radv_device_to_handle(device
),
572 state
->resolve_compute
.depth
[i
].max_pipeline
,
575 radv_DestroyPipeline(radv_device_to_handle(device
),
576 state
->resolve_compute
.depth
[i
].min_pipeline
,
579 radv_DestroyPipeline(radv_device_to_handle(device
),
580 state
->resolve_compute
.stencil
[i
].max_pipeline
,
583 radv_DestroyPipeline(radv_device_to_handle(device
),
584 state
->resolve_compute
.stencil
[i
].min_pipeline
,
588 radv_DestroyPipeline(radv_device_to_handle(device
),
589 state
->resolve_compute
.depth_zero_pipeline
,
592 radv_DestroyPipeline(radv_device_to_handle(device
),
593 state
->resolve_compute
.stencil_zero_pipeline
,
596 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
597 state
->resolve_compute
.ds_layout
,
599 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
600 state
->resolve_compute
.p_layout
,
605 radv_get_resolve_pipeline(struct radv_cmd_buffer
*cmd_buffer
,
606 struct radv_image_view
*src_iview
)
608 struct radv_device
*device
= cmd_buffer
->device
;
609 struct radv_meta_state
*state
= &device
->meta_state
;
610 uint32_t samples
= src_iview
->image
->info
.samples
;
611 uint32_t samples_log2
= ffs(samples
) - 1;
612 VkPipeline
*pipeline
;
614 if (vk_format_is_int(src_iview
->vk_format
))
615 pipeline
= &state
->resolve_compute
.rc
[samples_log2
].i_pipeline
;
616 else if (vk_format_is_srgb(src_iview
->vk_format
))
617 pipeline
= &state
->resolve_compute
.rc
[samples_log2
].srgb_pipeline
;
619 pipeline
= &state
->resolve_compute
.rc
[samples_log2
].pipeline
;
624 ret
= create_resolve_pipeline(device
, samples
,
625 vk_format_is_int(src_iview
->vk_format
),
626 vk_format_is_srgb(src_iview
->vk_format
),
628 if (ret
!= VK_SUCCESS
) {
629 cmd_buffer
->record_result
= ret
;
638 emit_resolve(struct radv_cmd_buffer
*cmd_buffer
,
639 struct radv_image_view
*src_iview
,
640 struct radv_image_view
*dest_iview
,
641 const VkOffset2D
*src_offset
,
642 const VkOffset2D
*dest_offset
,
643 const VkExtent2D
*resolve_extent
)
645 struct radv_device
*device
= cmd_buffer
->device
;
646 VkPipeline
*pipeline
;
648 radv_meta_push_descriptor_set(cmd_buffer
,
649 VK_PIPELINE_BIND_POINT_COMPUTE
,
650 device
->meta_state
.resolve_compute
.p_layout
,
652 2, /* descriptorWriteCount */
653 (VkWriteDescriptorSet
[]) {
655 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
657 .dstArrayElement
= 0,
658 .descriptorCount
= 1,
659 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
660 .pImageInfo
= (VkDescriptorImageInfo
[]) {
662 .sampler
= VK_NULL_HANDLE
,
663 .imageView
= radv_image_view_to_handle(src_iview
),
664 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
},
668 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
670 .dstArrayElement
= 0,
671 .descriptorCount
= 1,
672 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
673 .pImageInfo
= (VkDescriptorImageInfo
[]) {
675 .sampler
= VK_NULL_HANDLE
,
676 .imageView
= radv_image_view_to_handle(dest_iview
),
677 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
,
683 pipeline
= radv_get_resolve_pipeline(cmd_buffer
, src_iview
);
685 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
686 VK_PIPELINE_BIND_POINT_COMPUTE
, *pipeline
);
688 unsigned push_constants
[4] = {
694 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
695 device
->meta_state
.resolve_compute
.p_layout
,
696 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16,
698 radv_unaligned_dispatch(cmd_buffer
, resolve_extent
->width
, resolve_extent
->height
, 1);
703 emit_depth_stencil_resolve(struct radv_cmd_buffer
*cmd_buffer
,
704 struct radv_image_view
*src_iview
,
705 struct radv_image_view
*dest_iview
,
706 const VkOffset2D
*src_offset
,
707 const VkOffset2D
*dest_offset
,
708 const VkExtent2D
*resolve_extent
,
709 VkImageAspectFlags aspects
,
710 VkResolveModeFlagBits resolve_mode
)
712 struct radv_device
*device
= cmd_buffer
->device
;
713 const uint32_t samples
= src_iview
->image
->info
.samples
;
714 const uint32_t samples_log2
= ffs(samples
) - 1;
715 VkPipeline
*pipeline
;
717 radv_meta_push_descriptor_set(cmd_buffer
,
718 VK_PIPELINE_BIND_POINT_COMPUTE
,
719 device
->meta_state
.resolve_compute
.p_layout
,
721 2, /* descriptorWriteCount */
722 (VkWriteDescriptorSet
[]) {
724 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
726 .dstArrayElement
= 0,
727 .descriptorCount
= 1,
728 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
729 .pImageInfo
= (VkDescriptorImageInfo
[]) {
731 .sampler
= VK_NULL_HANDLE
,
732 .imageView
= radv_image_view_to_handle(src_iview
),
733 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
},
737 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
739 .dstArrayElement
= 0,
740 .descriptorCount
= 1,
741 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
742 .pImageInfo
= (VkDescriptorImageInfo
[]) {
744 .sampler
= VK_NULL_HANDLE
,
745 .imageView
= radv_image_view_to_handle(dest_iview
),
746 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
,
752 switch (resolve_mode
) {
753 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
:
754 if (aspects
== VK_IMAGE_ASPECT_DEPTH_BIT
)
755 pipeline
= &device
->meta_state
.resolve_compute
.depth_zero_pipeline
;
757 pipeline
= &device
->meta_state
.resolve_compute
.stencil_zero_pipeline
;
759 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR
:
760 assert(aspects
== VK_IMAGE_ASPECT_DEPTH_BIT
);
761 pipeline
= &device
->meta_state
.resolve_compute
.depth
[samples_log2
].average_pipeline
;
763 case VK_RESOLVE_MODE_MIN_BIT_KHR
:
764 if (aspects
== VK_IMAGE_ASPECT_DEPTH_BIT
)
765 pipeline
= &device
->meta_state
.resolve_compute
.depth
[samples_log2
].min_pipeline
;
767 pipeline
= &device
->meta_state
.resolve_compute
.stencil
[samples_log2
].min_pipeline
;
769 case VK_RESOLVE_MODE_MAX_BIT_KHR
:
770 if (aspects
== VK_IMAGE_ASPECT_DEPTH_BIT
)
771 pipeline
= &device
->meta_state
.resolve_compute
.depth
[samples_log2
].max_pipeline
;
773 pipeline
= &device
->meta_state
.resolve_compute
.stencil
[samples_log2
].max_pipeline
;
776 unreachable("invalid resolve mode");
780 int index
= aspects
== VK_IMAGE_ASPECT_DEPTH_BIT
? DEPTH_RESOLVE
: STENCIL_RESOLVE
;
783 ret
= create_depth_stencil_resolve_pipeline(device
, samples
,
786 if (ret
!= VK_SUCCESS
) {
787 cmd_buffer
->record_result
= ret
;
792 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
793 VK_PIPELINE_BIND_POINT_COMPUTE
, *pipeline
);
795 unsigned push_constants
[4] = {
801 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
802 device
->meta_state
.resolve_compute
.p_layout
,
803 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16,
805 radv_unaligned_dispatch(cmd_buffer
, resolve_extent
->width
, resolve_extent
->height
, 1);
809 void radv_meta_resolve_compute_image(struct radv_cmd_buffer
*cmd_buffer
,
810 struct radv_image
*src_image
,
812 VkImageLayout src_image_layout
,
813 struct radv_image
*dest_image
,
814 VkFormat dest_format
,
815 VkImageLayout dest_image_layout
,
816 uint32_t region_count
,
817 const VkImageResolve
*regions
)
819 struct radv_meta_saved_state saved_state
;
821 radv_decompress_resolve_src(cmd_buffer
, src_image
, src_image_layout
,
822 region_count
, regions
);
824 radv_meta_save(&saved_state
, cmd_buffer
,
825 RADV_META_SAVE_COMPUTE_PIPELINE
|
826 RADV_META_SAVE_CONSTANTS
|
827 RADV_META_SAVE_DESCRIPTORS
);
829 for (uint32_t r
= 0; r
< region_count
; ++r
) {
830 const VkImageResolve
*region
= ®ions
[r
];
832 assert(region
->srcSubresource
.aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
833 assert(region
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
834 assert(region
->srcSubresource
.layerCount
== region
->dstSubresource
.layerCount
);
836 const uint32_t src_base_layer
=
837 radv_meta_get_iview_layer(src_image
, ®ion
->srcSubresource
,
840 const uint32_t dest_base_layer
=
841 radv_meta_get_iview_layer(dest_image
, ®ion
->dstSubresource
,
844 const struct VkExtent3D extent
=
845 radv_sanitize_image_extent(src_image
->type
, region
->extent
);
846 const struct VkOffset3D srcOffset
=
847 radv_sanitize_image_offset(src_image
->type
, region
->srcOffset
);
848 const struct VkOffset3D dstOffset
=
849 radv_sanitize_image_offset(dest_image
->type
, region
->dstOffset
);
851 for (uint32_t layer
= 0; layer
< region
->srcSubresource
.layerCount
;
854 struct radv_image_view src_iview
;
855 radv_image_view_init(&src_iview
, cmd_buffer
->device
,
856 &(VkImageViewCreateInfo
) {
857 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
858 .image
= radv_image_to_handle(src_image
),
859 .viewType
= radv_meta_get_view_type(src_image
),
860 .format
= src_format
,
861 .subresourceRange
= {
862 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
863 .baseMipLevel
= region
->srcSubresource
.mipLevel
,
865 .baseArrayLayer
= src_base_layer
+ layer
,
870 struct radv_image_view dest_iview
;
871 radv_image_view_init(&dest_iview
, cmd_buffer
->device
,
872 &(VkImageViewCreateInfo
) {
873 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
874 .image
= radv_image_to_handle(dest_image
),
875 .viewType
= radv_meta_get_view_type(dest_image
),
876 .format
= vk_to_non_srgb_format(dest_format
),
877 .subresourceRange
= {
878 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
879 .baseMipLevel
= region
->dstSubresource
.mipLevel
,
881 .baseArrayLayer
= dest_base_layer
+ layer
,
886 emit_resolve(cmd_buffer
,
889 &(VkOffset2D
) {srcOffset
.x
, srcOffset
.y
},
890 &(VkOffset2D
) {dstOffset
.x
, dstOffset
.y
},
891 &(VkExtent2D
) {extent
.width
, extent
.height
});
894 radv_meta_restore(&saved_state
, cmd_buffer
);
898 * Emit any needed resolves for the current subpass.
901 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer
*cmd_buffer
)
903 struct radv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
904 const struct radv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
905 struct radv_subpass_barrier barrier
;
906 uint32_t layer_count
= fb
->layers
;
908 if (subpass
->view_mask
)
909 layer_count
= util_last_bit(subpass
->view_mask
);
911 /* Resolves happen before the end-of-subpass barriers get executed, so
912 * we have to make the attachment shader-readable.
914 barrier
.src_stage_mask
= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
;
915 barrier
.src_access_mask
= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
;
916 barrier
.dst_access_mask
= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
;
917 radv_subpass_barrier(cmd_buffer
, &barrier
);
919 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
920 struct radv_subpass_attachment src_att
= subpass
->color_attachments
[i
];
921 struct radv_subpass_attachment dst_att
= subpass
->resolve_attachments
[i
];
923 if (dst_att
.attachment
== VK_ATTACHMENT_UNUSED
)
926 struct radv_image_view
*src_iview
= cmd_buffer
->state
.attachments
[src_att
.attachment
].iview
;
927 struct radv_image_view
*dst_iview
= cmd_buffer
->state
.attachments
[dst_att
.attachment
].iview
;
929 VkImageResolve region
= {
930 .extent
= (VkExtent3D
){ fb
->width
, fb
->height
, 0 },
931 .srcSubresource
= (VkImageSubresourceLayers
) {
932 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
933 .mipLevel
= src_iview
->base_mip
,
934 .baseArrayLayer
= src_iview
->base_layer
,
935 .layerCount
= layer_count
,
937 .dstSubresource
= (VkImageSubresourceLayers
) {
938 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
939 .mipLevel
= dst_iview
->base_mip
,
940 .baseArrayLayer
= dst_iview
->base_layer
,
941 .layerCount
= layer_count
,
943 .srcOffset
= (VkOffset3D
){ 0, 0, 0 },
944 .dstOffset
= (VkOffset3D
){ 0, 0, 0 },
947 radv_meta_resolve_compute_image(cmd_buffer
,
949 src_iview
->vk_format
,
952 dst_iview
->vk_format
,
957 cmd_buffer
->state
.flush_bits
|= RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
958 RADV_CMD_FLAG_INV_VCACHE
;
962 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer
*cmd_buffer
,
963 VkImageAspectFlags aspects
,
964 VkResolveModeFlagBits resolve_mode
)
966 struct radv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
967 const struct radv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
968 struct radv_meta_saved_state saved_state
;
969 struct radv_subpass_barrier barrier
;
970 uint32_t layer_count
= fb
->layers
;
972 if (subpass
->view_mask
)
973 layer_count
= util_last_bit(subpass
->view_mask
);
975 /* Resolves happen before the end-of-subpass barriers get executed, so
976 * we have to make the attachment shader-readable.
978 barrier
.src_stage_mask
= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
;
979 barrier
.src_access_mask
= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
;
980 barrier
.dst_access_mask
= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
;
981 radv_subpass_barrier(cmd_buffer
, &barrier
);
983 radv_decompress_resolve_subpass_src(cmd_buffer
);
985 radv_meta_save(&saved_state
, cmd_buffer
,
986 RADV_META_SAVE_COMPUTE_PIPELINE
|
987 RADV_META_SAVE_CONSTANTS
|
988 RADV_META_SAVE_DESCRIPTORS
);
990 struct radv_subpass_attachment src_att
= *subpass
->depth_stencil_attachment
;
991 struct radv_subpass_attachment dest_att
= *subpass
->ds_resolve_attachment
;
993 struct radv_image_view
*src_iview
=
994 cmd_buffer
->state
.attachments
[src_att
.attachment
].iview
;
995 struct radv_image_view
*dst_iview
=
996 cmd_buffer
->state
.attachments
[dest_att
.attachment
].iview
;
998 struct radv_image
*src_image
= src_iview
->image
;
999 struct radv_image
*dst_image
= dst_iview
->image
;
1001 for (uint32_t layer
= 0; layer
< layer_count
; layer
++) {
1002 struct radv_image_view tsrc_iview
;
1003 radv_image_view_init(&tsrc_iview
, cmd_buffer
->device
,
1004 &(VkImageViewCreateInfo
) {
1005 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
1006 .image
= radv_image_to_handle(src_image
),
1007 .viewType
= radv_meta_get_view_type(src_image
),
1008 .format
= src_iview
->vk_format
,
1009 .subresourceRange
= {
1010 .aspectMask
= aspects
,
1011 .baseMipLevel
= src_iview
->base_mip
,
1013 .baseArrayLayer
= src_iview
->base_layer
+ layer
,
1018 struct radv_image_view tdst_iview
;
1019 radv_image_view_init(&tdst_iview
, cmd_buffer
->device
,
1020 &(VkImageViewCreateInfo
) {
1021 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
1022 .image
= radv_image_to_handle(dst_image
),
1023 .viewType
= radv_meta_get_view_type(dst_image
),
1024 .format
= dst_iview
->vk_format
,
1025 .subresourceRange
= {
1026 .aspectMask
= aspects
,
1027 .baseMipLevel
= dst_iview
->base_mip
,
1029 .baseArrayLayer
= dst_iview
->base_layer
+ layer
,
1034 emit_depth_stencil_resolve(cmd_buffer
, &tsrc_iview
, &tdst_iview
,
1035 &(VkOffset2D
) { 0, 0 },
1036 &(VkOffset2D
) { 0, 0 },
1037 &(VkExtent2D
) { fb
->width
, fb
->height
},
1042 cmd_buffer
->state
.flush_bits
|= RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
1043 RADV_CMD_FLAG_INV_VCACHE
;
1045 if (radv_image_has_htile(dst_image
)) {
1046 if (aspects
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1047 VkImageSubresourceRange range
= {};
1048 range
.aspectMask
= VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
;
1049 range
.baseMipLevel
= dst_iview
->base_mip
;
1050 range
.levelCount
= 1;
1051 range
.baseArrayLayer
= dst_iview
->base_layer
;
1052 range
.layerCount
= layer_count
;
1054 uint32_t clear_value
= 0xfffc000f;
1056 if (vk_format_is_stencil(dst_image
->vk_format
) &&
1057 subpass
->stencil_resolve_mode
!= VK_RESOLVE_MODE_NONE_KHR
) {
1058 /* Only clear the stencil part of the HTILE
1059 * buffer if it's resolved, otherwise this
1060 * might break if the stencil has been cleared.
1062 clear_value
= 0xfffff30f;
1065 cmd_buffer
->state
.flush_bits
|=
1066 radv_clear_htile(cmd_buffer
, dst_image
, &range
,
1071 radv_meta_restore(&saved_state
, cmd_buffer
);