2 * Copyright © 2016 Dave Airlie
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
32 #include "vk_format.h"
35 build_resolve_compute_shader(struct radv_device
*dev
, bool is_integer
, int samples
)
39 nir_if
*outer_if
= NULL
;
40 const struct glsl_type
*sampler_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_MS
,
44 const struct glsl_type
*img_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_2D
,
48 snprintf(name
, 64, "meta_resolve_cs-%d-%s", samples
, is_integer
? "int" : "float");
49 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
50 b
.shader
->info
->name
= ralloc_strdup(b
.shader
, name
);
51 b
.shader
->info
->cs
.local_size
[0] = 16;
52 b
.shader
->info
->cs
.local_size
[1] = 16;
53 b
.shader
->info
->cs
.local_size
[2] = 1;
55 nir_variable
*input_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
56 sampler_type
, "s_tex");
57 input_img
->data
.descriptor_set
= 0;
58 input_img
->data
.binding
= 0;
60 nir_variable
*output_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
62 output_img
->data
.descriptor_set
= 0;
63 output_img
->data
.binding
= 1;
64 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
65 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
66 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
67 b
.shader
->info
->cs
.local_size
[0],
68 b
.shader
->info
->cs
.local_size
[1],
69 b
.shader
->info
->cs
.local_size
[2], 0);
71 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
73 nir_intrinsic_instr
*src_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
74 src_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
75 src_offset
->num_components
= 2;
76 nir_ssa_dest_init(&src_offset
->instr
, &src_offset
->dest
, 2, 32, "src_offset");
77 nir_builder_instr_insert(&b
, &src_offset
->instr
);
79 nir_intrinsic_instr
*dst_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
80 dst_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 8));
81 dst_offset
->num_components
= 2;
82 nir_ssa_dest_init(&dst_offset
->instr
, &dst_offset
->dest
, 2, 32, "dst_offset");
83 nir_builder_instr_insert(&b
, &dst_offset
->instr
);
85 nir_ssa_def
*img_coord
= nir_iadd(&b
, global_id
, &src_offset
->dest
.ssa
);
86 /* do a txf_ms on each sample */
89 nir_tex_instr
*tex
= nir_tex_instr_create(b
.shader
, 2);
90 tex
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
91 tex
->op
= nir_texop_txf_ms
;
92 tex
->src
[0].src_type
= nir_tex_src_coord
;
93 tex
->src
[0].src
= nir_src_for_ssa(img_coord
);
94 tex
->src
[1].src_type
= nir_tex_src_ms_index
;
95 tex
->src
[1].src
= nir_src_for_ssa(nir_imm_int(&b
, 0));
96 tex
->dest_type
= nir_type_float
;
97 tex
->is_array
= false;
98 tex
->coord_components
= 2;
99 tex
->texture
= nir_deref_var_create(tex
, input_img
);
102 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
, 4, 32, "tex");
103 nir_builder_instr_insert(&b
, &tex
->instr
);
105 tmp
= &tex
->dest
.ssa
;
106 nir_variable
*color
=
107 nir_local_variable_create(b
.impl
, glsl_vec4_type(), "color");
109 if (!is_integer
&& samples
> 1) {
110 nir_tex_instr
*tex_all_same
= nir_tex_instr_create(b
.shader
, 1);
111 tex_all_same
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
112 tex_all_same
->op
= nir_texop_samples_identical
;
113 tex_all_same
->src
[0].src_type
= nir_tex_src_coord
;
114 tex_all_same
->src
[0].src
= nir_src_for_ssa(img_coord
);
115 tex_all_same
->dest_type
= nir_type_float
;
116 tex_all_same
->is_array
= false;
117 tex_all_same
->coord_components
= 2;
118 tex_all_same
->texture
= nir_deref_var_create(tex_all_same
, input_img
);
119 tex_all_same
->sampler
= NULL
;
121 nir_ssa_dest_init(&tex_all_same
->instr
, &tex_all_same
->dest
, 1, 32, "tex");
122 nir_builder_instr_insert(&b
, &tex_all_same
->instr
);
124 nir_ssa_def
*all_same
= nir_ine(&b
, &tex_all_same
->dest
.ssa
, nir_imm_int(&b
, 0));
125 nir_if
*if_stmt
= nir_if_create(b
.shader
);
126 if_stmt
->condition
= nir_src_for_ssa(all_same
);
127 nir_cf_node_insert(b
.cursor
, &if_stmt
->cf_node
);
129 b
.cursor
= nir_after_cf_list(&if_stmt
->then_list
);
130 for (int i
= 1; i
< samples
; i
++) {
131 nir_tex_instr
*tex_add
= nir_tex_instr_create(b
.shader
, 2);
132 tex_add
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
133 tex_add
->op
= nir_texop_txf_ms
;
134 tex_add
->src
[0].src_type
= nir_tex_src_coord
;
135 tex_add
->src
[0].src
= nir_src_for_ssa(img_coord
);
136 tex_add
->src
[1].src_type
= nir_tex_src_ms_index
;
137 tex_add
->src
[1].src
= nir_src_for_ssa(nir_imm_int(&b
, i
));
138 tex_add
->dest_type
= nir_type_float
;
139 tex_add
->is_array
= false;
140 tex_add
->coord_components
= 2;
141 tex_add
->texture
= nir_deref_var_create(tex_add
, input_img
);
142 tex_add
->sampler
= NULL
;
144 nir_ssa_dest_init(&tex_add
->instr
, &tex_add
->dest
, 4, 32, "tex");
145 nir_builder_instr_insert(&b
, &tex_add
->instr
);
147 tmp
= nir_fadd(&b
, tmp
, &tex_add
->dest
.ssa
);
150 tmp
= nir_fdiv(&b
, tmp
, nir_imm_float(&b
, samples
));
151 nir_store_var(&b
, color
, tmp
, 0xf);
152 b
.cursor
= nir_after_cf_list(&if_stmt
->else_list
);
155 nir_store_var(&b
, color
, &tex
->dest
.ssa
, 0xf);
158 b
.cursor
= nir_after_cf_node(&outer_if
->cf_node
);
160 nir_ssa_def
*newv
= nir_load_var(&b
, color
);
161 nir_ssa_def
*coord
= nir_iadd(&b
, global_id
, &dst_offset
->dest
.ssa
);
162 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_image_store
);
163 store
->src
[0] = nir_src_for_ssa(coord
);
164 store
->src
[1] = nir_src_for_ssa(nir_ssa_undef(&b
, 1, 32));
165 store
->src
[2] = nir_src_for_ssa(newv
);
166 store
->variables
[0] = nir_deref_var_create(store
, output_img
);
167 nir_builder_instr_insert(&b
, &store
->instr
);
173 create_layout(struct radv_device
*device
)
177 * two descriptors one for the image being sampled
178 * one for the buffer being written.
180 VkDescriptorSetLayoutCreateInfo ds_create_info
= {
181 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
183 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
186 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
187 .descriptorCount
= 1,
188 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
189 .pImmutableSamplers
= NULL
193 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
194 .descriptorCount
= 1,
195 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
196 .pImmutableSamplers
= NULL
201 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
203 &device
->meta_state
.alloc
,
204 &device
->meta_state
.resolve_compute
.ds_layout
);
205 if (result
!= VK_SUCCESS
)
209 VkPipelineLayoutCreateInfo pl_create_info
= {
210 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
212 .pSetLayouts
= &device
->meta_state
.resolve_compute
.ds_layout
,
213 .pushConstantRangeCount
= 1,
214 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16},
217 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
219 &device
->meta_state
.alloc
,
220 &device
->meta_state
.resolve_compute
.p_layout
);
221 if (result
!= VK_SUCCESS
)
229 create_resolve_pipeline(struct radv_device
*device
,
232 VkPipeline
*pipeline
)
235 struct radv_shader_module cs
= { .nir
= NULL
};
237 cs
.nir
= build_resolve_compute_shader(device
, is_integer
, samples
);
241 VkPipelineShaderStageCreateInfo pipeline_shader_stage
= {
242 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
243 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
244 .module
= radv_shader_module_to_handle(&cs
),
246 .pSpecializationInfo
= NULL
,
249 VkComputePipelineCreateInfo vk_pipeline_info
= {
250 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
251 .stage
= pipeline_shader_stage
,
253 .layout
= device
->meta_state
.resolve_compute
.p_layout
,
256 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
257 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
258 1, &vk_pipeline_info
, NULL
,
260 if (result
!= VK_SUCCESS
)
271 radv_device_init_meta_resolve_compute_state(struct radv_device
*device
)
273 struct radv_meta_state
*state
= &device
->meta_state
;
275 memset(&device
->meta_state
.resolve_compute
, 0, sizeof(device
->meta_state
.resolve_compute
));
277 res
= create_layout(device
);
278 if (res
!= VK_SUCCESS
)
281 for (uint32_t i
= 0; i
< MAX_SAMPLES_LOG2
; ++i
) {
282 uint32_t samples
= 1 << i
;
284 res
= create_resolve_pipeline(device
, samples
, false,
285 &state
->resolve_compute
.rc
[i
].pipeline
);
287 res
= create_resolve_pipeline(device
, samples
, true,
288 &state
->resolve_compute
.rc
[i
].i_pipeline
);
296 radv_device_finish_meta_resolve_compute_state(struct radv_device
*device
)
298 struct radv_meta_state
*state
= &device
->meta_state
;
299 for (uint32_t i
= 0; i
< MAX_SAMPLES_LOG2
; ++i
) {
300 radv_DestroyPipeline(radv_device_to_handle(device
),
301 state
->resolve_compute
.rc
[i
].pipeline
,
304 radv_DestroyPipeline(radv_device_to_handle(device
),
305 state
->resolve_compute
.rc
[i
].i_pipeline
,
309 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
310 state
->resolve_compute
.ds_layout
,
312 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
313 state
->resolve_compute
.p_layout
,
317 void radv_meta_resolve_compute_image(struct radv_cmd_buffer
*cmd_buffer
,
318 struct radv_image
*src_image
,
319 VkImageLayout src_image_layout
,
320 struct radv_image
*dest_image
,
321 VkImageLayout dest_image_layout
,
322 uint32_t region_count
,
323 const VkImageResolve
*regions
)
325 struct radv_device
*device
= cmd_buffer
->device
;
326 struct radv_meta_saved_compute_state saved_state
;
327 const uint32_t samples
= src_image
->samples
;
328 const uint32_t samples_log2
= ffs(samples
) - 1;
329 radv_meta_save_compute(&saved_state
, cmd_buffer
, 16);
331 for (uint32_t r
= 0; r
< region_count
; ++r
) {
332 const VkImageResolve
*region
= ®ions
[r
];
334 assert(region
->srcSubresource
.aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
335 assert(region
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
336 assert(region
->srcSubresource
.layerCount
== region
->dstSubresource
.layerCount
);
338 const uint32_t src_base_layer
=
339 radv_meta_get_iview_layer(src_image
, ®ion
->srcSubresource
,
342 const uint32_t dest_base_layer
=
343 radv_meta_get_iview_layer(dest_image
, ®ion
->dstSubresource
,
346 const struct VkExtent3D extent
=
347 radv_sanitize_image_extent(src_image
->type
, region
->extent
);
348 const struct VkOffset3D srcOffset
=
349 radv_sanitize_image_offset(src_image
->type
, region
->srcOffset
);
350 const struct VkOffset3D dstOffset
=
351 radv_sanitize_image_offset(dest_image
->type
, region
->dstOffset
);
353 for (uint32_t layer
= 0; layer
< region
->srcSubresource
.layerCount
;
356 struct radv_image_view src_iview
;
358 radv_image_view_init(&src_iview
, cmd_buffer
->device
,
359 &(VkImageViewCreateInfo
) {
360 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
361 .image
= radv_image_to_handle(src_image
),
362 .viewType
= radv_meta_get_view_type(src_image
),
363 .format
= src_image
->vk_format
,
364 .subresourceRange
= {
365 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
366 .baseMipLevel
= region
->srcSubresource
.mipLevel
,
368 .baseArrayLayer
= src_base_layer
+ layer
,
372 cmd_buffer
, VK_IMAGE_USAGE_SAMPLED_BIT
);
374 struct radv_image_view dest_iview
;
375 radv_image_view_init(&dest_iview
, cmd_buffer
->device
,
376 &(VkImageViewCreateInfo
) {
377 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
378 .image
= radv_image_to_handle(dest_image
),
379 .viewType
= radv_meta_get_view_type(dest_image
),
380 .format
= dest_image
->vk_format
,
381 .subresourceRange
= {
382 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
383 .baseMipLevel
= region
->dstSubresource
.mipLevel
,
385 .baseArrayLayer
= dest_base_layer
+ layer
,
389 cmd_buffer
, VK_IMAGE_USAGE_STORAGE_BIT
);
392 radv_temp_descriptor_set_create(device
, cmd_buffer
,
393 device
->meta_state
.resolve_compute
.ds_layout
,
396 radv_UpdateDescriptorSets(radv_device_to_handle(device
),
398 (VkWriteDescriptorSet
[]) {
400 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
403 .dstArrayElement
= 0,
404 .descriptorCount
= 1,
405 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
406 .pImageInfo
= (VkDescriptorImageInfo
[]) {
409 .imageView
= radv_image_view_to_handle(&src_iview
),
410 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
,
415 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
418 .dstArrayElement
= 0,
419 .descriptorCount
= 1,
420 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
421 .pImageInfo
= (VkDescriptorImageInfo
[]) {
424 .imageView
= radv_image_view_to_handle(&dest_iview
),
425 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
,
431 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer
),
432 VK_PIPELINE_BIND_POINT_COMPUTE
,
433 device
->meta_state
.resolve_compute
.p_layout
, 0, 1,
437 if (vk_format_is_int(src_image
->vk_format
))
438 pipeline
= device
->meta_state
.resolve_compute
.rc
[samples_log2
].i_pipeline
;
440 pipeline
= device
->meta_state
.resolve_compute
.rc
[samples_log2
].pipeline
;
441 if (cmd_buffer
->state
.compute_pipeline
!= radv_pipeline_from_handle(pipeline
)) {
442 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
443 VK_PIPELINE_BIND_POINT_COMPUTE
, pipeline
);
446 unsigned push_constants
[4] = {
452 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
453 device
->meta_state
.resolve_compute
.p_layout
,
454 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16,
456 radv_unaligned_dispatch(cmd_buffer
, extent
.width
, extent
.height
, 1);
457 radv_temp_descriptor_set_destroy(cmd_buffer
->device
, set
);
460 radv_meta_restore_compute(&saved_state
, cmd_buffer
, 16);