2 * Copyright © 2016 Dave Airlie
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
32 #include "vk_format.h"
34 static nir_ssa_def
*radv_meta_build_resolve_srgb_conversion(nir_builder
*b
,
39 v
.u32
[0] = 0x3b4d2e1c; // 0.00313080009
42 for (i
= 0; i
< 3; i
++)
43 cmp
[i
] = nir_flt(b
, nir_channel(b
, input
, i
),
44 nir_build_imm(b
, 1, 32, v
));
46 nir_ssa_def
*ltvals
[3];
48 for (i
= 0; i
< 3; i
++)
49 ltvals
[i
] = nir_fmul(b
, nir_channel(b
, input
, i
),
50 nir_build_imm(b
, 1, 32, v
));
52 nir_ssa_def
*gtvals
[3];
54 for (i
= 0; i
< 3; i
++) {
56 gtvals
[i
] = nir_fpow(b
, nir_channel(b
, input
, i
),
57 nir_build_imm(b
, 1, 32, v
));
59 gtvals
[i
] = nir_fmul(b
, gtvals
[i
],
60 nir_build_imm(b
, 1, 32, v
));
62 gtvals
[i
] = nir_fsub(b
, gtvals
[i
],
63 nir_build_imm(b
, 1, 32, v
));
67 for (i
= 0; i
< 3; i
++)
68 comp
[i
] = nir_bcsel(b
, cmp
[i
], ltvals
[i
], gtvals
[i
]);
69 comp
[3] = nir_channels(b
, input
, 1 << 3);
70 return nir_vec(b
, comp
, 4);
74 build_resolve_compute_shader(struct radv_device
*dev
, bool is_integer
, bool is_srgb
, int samples
)
78 const struct glsl_type
*sampler_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_MS
,
82 const struct glsl_type
*img_type
= glsl_sampler_type(GLSL_SAMPLER_DIM_2D
,
86 snprintf(name
, 64, "meta_resolve_cs-%d-%s", samples
, is_integer
? "int" : (is_srgb
? "srgb" : "float"));
87 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
88 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, name
);
89 b
.shader
->info
.cs
.local_size
[0] = 16;
90 b
.shader
->info
.cs
.local_size
[1] = 16;
91 b
.shader
->info
.cs
.local_size
[2] = 1;
93 nir_variable
*input_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
94 sampler_type
, "s_tex");
95 input_img
->data
.descriptor_set
= 0;
96 input_img
->data
.binding
= 0;
98 nir_variable
*output_img
= nir_variable_create(b
.shader
, nir_var_uniform
,
100 output_img
->data
.descriptor_set
= 0;
101 output_img
->data
.binding
= 1;
102 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
103 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
104 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
105 b
.shader
->info
.cs
.local_size
[0],
106 b
.shader
->info
.cs
.local_size
[1],
107 b
.shader
->info
.cs
.local_size
[2], 0);
109 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
111 nir_intrinsic_instr
*src_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
112 nir_intrinsic_set_base(src_offset
, 0);
113 nir_intrinsic_set_range(src_offset
, 16);
114 src_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
115 src_offset
->num_components
= 2;
116 nir_ssa_dest_init(&src_offset
->instr
, &src_offset
->dest
, 2, 32, "src_offset");
117 nir_builder_instr_insert(&b
, &src_offset
->instr
);
119 nir_intrinsic_instr
*dst_offset
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
120 nir_intrinsic_set_base(dst_offset
, 0);
121 nir_intrinsic_set_range(dst_offset
, 16);
122 dst_offset
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 8));
123 dst_offset
->num_components
= 2;
124 nir_ssa_dest_init(&dst_offset
->instr
, &dst_offset
->dest
, 2, 32, "dst_offset");
125 nir_builder_instr_insert(&b
, &dst_offset
->instr
);
127 nir_ssa_def
*img_coord
= nir_channels(&b
, nir_iadd(&b
, global_id
, &src_offset
->dest
.ssa
), 0x3);
128 nir_variable
*color
= nir_local_variable_create(b
.impl
, glsl_vec4_type(), "color");
130 radv_meta_build_resolve_shader_core(&b
, is_integer
, samples
, input_img
,
133 nir_ssa_def
*outval
= nir_load_var(&b
, color
);
135 outval
= radv_meta_build_resolve_srgb_conversion(&b
, outval
);
137 nir_ssa_def
*coord
= nir_iadd(&b
, global_id
, &dst_offset
->dest
.ssa
);
138 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_image_deref_store
);
139 store
->src
[0] = nir_src_for_ssa(&nir_build_deref_var(&b
, output_img
)->dest
.ssa
);
140 store
->src
[1] = nir_src_for_ssa(coord
);
141 store
->src
[2] = nir_src_for_ssa(nir_ssa_undef(&b
, 1, 32));
142 store
->src
[3] = nir_src_for_ssa(outval
);
143 nir_builder_instr_insert(&b
, &store
->instr
);
149 create_layout(struct radv_device
*device
)
153 * two descriptors one for the image being sampled
154 * one for the buffer being written.
156 VkDescriptorSetLayoutCreateInfo ds_create_info
= {
157 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
158 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
160 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
163 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
164 .descriptorCount
= 1,
165 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
166 .pImmutableSamplers
= NULL
170 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
171 .descriptorCount
= 1,
172 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
173 .pImmutableSamplers
= NULL
178 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
180 &device
->meta_state
.alloc
,
181 &device
->meta_state
.resolve_compute
.ds_layout
);
182 if (result
!= VK_SUCCESS
)
186 VkPipelineLayoutCreateInfo pl_create_info
= {
187 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
189 .pSetLayouts
= &device
->meta_state
.resolve_compute
.ds_layout
,
190 .pushConstantRangeCount
= 1,
191 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16},
194 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
196 &device
->meta_state
.alloc
,
197 &device
->meta_state
.resolve_compute
.p_layout
);
198 if (result
!= VK_SUCCESS
)
206 create_resolve_pipeline(struct radv_device
*device
,
210 VkPipeline
*pipeline
)
213 struct radv_shader_module cs
= { .nir
= NULL
};
215 mtx_lock(&device
->meta_state
.mtx
);
217 mtx_unlock(&device
->meta_state
.mtx
);
221 cs
.nir
= build_resolve_compute_shader(device
, is_integer
, is_srgb
, samples
);
225 VkPipelineShaderStageCreateInfo pipeline_shader_stage
= {
226 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
227 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
228 .module
= radv_shader_module_to_handle(&cs
),
230 .pSpecializationInfo
= NULL
,
233 VkComputePipelineCreateInfo vk_pipeline_info
= {
234 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
235 .stage
= pipeline_shader_stage
,
237 .layout
= device
->meta_state
.resolve_compute
.p_layout
,
240 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
241 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
242 1, &vk_pipeline_info
, NULL
,
244 if (result
!= VK_SUCCESS
)
248 mtx_unlock(&device
->meta_state
.mtx
);
252 mtx_unlock(&device
->meta_state
.mtx
);
257 radv_device_init_meta_resolve_compute_state(struct radv_device
*device
, bool on_demand
)
259 struct radv_meta_state
*state
= &device
->meta_state
;
262 res
= create_layout(device
);
263 if (res
!= VK_SUCCESS
)
269 for (uint32_t i
= 0; i
< MAX_SAMPLES_LOG2
; ++i
) {
270 uint32_t samples
= 1 << i
;
272 res
= create_resolve_pipeline(device
, samples
, false, false,
273 &state
->resolve_compute
.rc
[i
].pipeline
);
274 if (res
!= VK_SUCCESS
)
277 res
= create_resolve_pipeline(device
, samples
, true, false,
278 &state
->resolve_compute
.rc
[i
].i_pipeline
);
279 if (res
!= VK_SUCCESS
)
282 res
= create_resolve_pipeline(device
, samples
, false, true,
283 &state
->resolve_compute
.rc
[i
].srgb_pipeline
);
284 if (res
!= VK_SUCCESS
)
291 radv_device_finish_meta_resolve_compute_state(device
);
296 radv_device_finish_meta_resolve_compute_state(struct radv_device
*device
)
298 struct radv_meta_state
*state
= &device
->meta_state
;
299 for (uint32_t i
= 0; i
< MAX_SAMPLES_LOG2
; ++i
) {
300 radv_DestroyPipeline(radv_device_to_handle(device
),
301 state
->resolve_compute
.rc
[i
].pipeline
,
304 radv_DestroyPipeline(radv_device_to_handle(device
),
305 state
->resolve_compute
.rc
[i
].i_pipeline
,
308 radv_DestroyPipeline(radv_device_to_handle(device
),
309 state
->resolve_compute
.rc
[i
].srgb_pipeline
,
313 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
314 state
->resolve_compute
.ds_layout
,
316 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
317 state
->resolve_compute
.p_layout
,
322 emit_resolve(struct radv_cmd_buffer
*cmd_buffer
,
323 struct radv_image_view
*src_iview
,
324 struct radv_image_view
*dest_iview
,
325 const VkOffset2D
*src_offset
,
326 const VkOffset2D
*dest_offset
,
327 const VkExtent2D
*resolve_extent
)
329 struct radv_device
*device
= cmd_buffer
->device
;
330 const uint32_t samples
= src_iview
->image
->info
.samples
;
331 const uint32_t samples_log2
= ffs(samples
) - 1;
332 radv_meta_push_descriptor_set(cmd_buffer
,
333 VK_PIPELINE_BIND_POINT_COMPUTE
,
334 device
->meta_state
.resolve_compute
.p_layout
,
336 2, /* descriptorWriteCount */
337 (VkWriteDescriptorSet
[]) {
339 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
341 .dstArrayElement
= 0,
342 .descriptorCount
= 1,
343 .descriptorType
= VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
,
344 .pImageInfo
= (VkDescriptorImageInfo
[]) {
346 .sampler
= VK_NULL_HANDLE
,
347 .imageView
= radv_image_view_to_handle(src_iview
),
348 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
},
352 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
354 .dstArrayElement
= 0,
355 .descriptorCount
= 1,
356 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
357 .pImageInfo
= (VkDescriptorImageInfo
[]) {
359 .sampler
= VK_NULL_HANDLE
,
360 .imageView
= radv_image_view_to_handle(dest_iview
),
361 .imageLayout
= VK_IMAGE_LAYOUT_GENERAL
,
367 VkPipeline
*pipeline
;
368 if (vk_format_is_int(src_iview
->image
->vk_format
))
369 pipeline
= &device
->meta_state
.resolve_compute
.rc
[samples_log2
].i_pipeline
;
370 else if (vk_format_is_srgb(src_iview
->image
->vk_format
))
371 pipeline
= &device
->meta_state
.resolve_compute
.rc
[samples_log2
].srgb_pipeline
;
373 pipeline
= &device
->meta_state
.resolve_compute
.rc
[samples_log2
].pipeline
;
376 VkResult ret
= create_resolve_pipeline(device
, samples
,
377 vk_format_is_int(src_iview
->image
->vk_format
),
378 vk_format_is_srgb(src_iview
->image
->vk_format
),
380 if (ret
!= VK_SUCCESS
) {
381 cmd_buffer
->record_result
= ret
;
386 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
387 VK_PIPELINE_BIND_POINT_COMPUTE
, *pipeline
);
389 unsigned push_constants
[4] = {
395 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
396 device
->meta_state
.resolve_compute
.p_layout
,
397 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 16,
399 radv_unaligned_dispatch(cmd_buffer
, resolve_extent
->width
, resolve_extent
->height
, 1);
403 void radv_meta_resolve_compute_image(struct radv_cmd_buffer
*cmd_buffer
,
404 struct radv_image
*src_image
,
405 VkImageLayout src_image_layout
,
406 struct radv_image
*dest_image
,
407 VkImageLayout dest_image_layout
,
408 uint32_t region_count
,
409 const VkImageResolve
*regions
)
411 struct radv_meta_saved_state saved_state
;
413 radv_decompress_resolve_src(cmd_buffer
, src_image
, src_image_layout
,
414 region_count
, regions
);
416 radv_meta_save(&saved_state
, cmd_buffer
,
417 RADV_META_SAVE_COMPUTE_PIPELINE
|
418 RADV_META_SAVE_CONSTANTS
|
419 RADV_META_SAVE_DESCRIPTORS
);
421 for (uint32_t r
= 0; r
< region_count
; ++r
) {
422 const VkImageResolve
*region
= ®ions
[r
];
424 assert(region
->srcSubresource
.aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
425 assert(region
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
426 assert(region
->srcSubresource
.layerCount
== region
->dstSubresource
.layerCount
);
428 const uint32_t src_base_layer
=
429 radv_meta_get_iview_layer(src_image
, ®ion
->srcSubresource
,
432 const uint32_t dest_base_layer
=
433 radv_meta_get_iview_layer(dest_image
, ®ion
->dstSubresource
,
436 const struct VkExtent3D extent
=
437 radv_sanitize_image_extent(src_image
->type
, region
->extent
);
438 const struct VkOffset3D srcOffset
=
439 radv_sanitize_image_offset(src_image
->type
, region
->srcOffset
);
440 const struct VkOffset3D dstOffset
=
441 radv_sanitize_image_offset(dest_image
->type
, region
->dstOffset
);
443 for (uint32_t layer
= 0; layer
< region
->srcSubresource
.layerCount
;
446 struct radv_image_view src_iview
;
447 radv_image_view_init(&src_iview
, cmd_buffer
->device
,
448 &(VkImageViewCreateInfo
) {
449 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
450 .image
= radv_image_to_handle(src_image
),
451 .viewType
= radv_meta_get_view_type(src_image
),
452 .format
= src_image
->vk_format
,
453 .subresourceRange
= {
454 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
455 .baseMipLevel
= region
->srcSubresource
.mipLevel
,
457 .baseArrayLayer
= src_base_layer
+ layer
,
462 struct radv_image_view dest_iview
;
463 radv_image_view_init(&dest_iview
, cmd_buffer
->device
,
464 &(VkImageViewCreateInfo
) {
465 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
466 .image
= radv_image_to_handle(dest_image
),
467 .viewType
= radv_meta_get_view_type(dest_image
),
468 .format
= vk_to_non_srgb_format(dest_image
->vk_format
),
469 .subresourceRange
= {
470 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
471 .baseMipLevel
= region
->dstSubresource
.mipLevel
,
473 .baseArrayLayer
= dest_base_layer
+ layer
,
478 emit_resolve(cmd_buffer
,
481 &(VkOffset2D
) {srcOffset
.x
, srcOffset
.y
},
482 &(VkOffset2D
) {dstOffset
.x
, dstOffset
.y
},
483 &(VkExtent2D
) {extent
.width
, extent
.height
});
486 radv_meta_restore(&saved_state
, cmd_buffer
);
490 * Emit any needed resolves for the current subpass.
493 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer
*cmd_buffer
)
495 struct radv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
496 const struct radv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
497 struct radv_meta_saved_state saved_state
;
498 struct radv_subpass_barrier barrier
;
500 /* Resolves happen before the end-of-subpass barriers get executed, so
501 * we have to make the attachment shader-readable.
503 barrier
.src_stage_mask
= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
;
504 barrier
.src_access_mask
= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
;
505 barrier
.dst_access_mask
= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
;
506 radv_subpass_barrier(cmd_buffer
, &barrier
);
508 radv_decompress_resolve_subpass_src(cmd_buffer
);
510 radv_meta_save(&saved_state
, cmd_buffer
,
511 RADV_META_SAVE_COMPUTE_PIPELINE
|
512 RADV_META_SAVE_CONSTANTS
|
513 RADV_META_SAVE_DESCRIPTORS
);
515 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
516 struct radv_subpass_attachment src_att
= subpass
->color_attachments
[i
];
517 struct radv_subpass_attachment dest_att
= subpass
->resolve_attachments
[i
];
518 struct radv_image_view
*src_iview
= cmd_buffer
->state
.framebuffer
->attachments
[src_att
.attachment
].attachment
;
519 struct radv_image_view
*dst_iview
= cmd_buffer
->state
.framebuffer
->attachments
[dest_att
.attachment
].attachment
;
520 if (dest_att
.attachment
== VK_ATTACHMENT_UNUSED
)
523 struct radv_image
*src_image
= src_iview
->image
;
524 struct radv_image
*dst_image
= dst_iview
->image
;
525 for (uint32_t layer
= 0; layer
< src_image
->info
.array_size
; layer
++) {
527 struct radv_image_view tsrc_iview
;
528 radv_image_view_init(&tsrc_iview
, cmd_buffer
->device
,
529 &(VkImageViewCreateInfo
) {
530 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
531 .image
= radv_image_to_handle(src_image
),
532 .viewType
= radv_meta_get_view_type(src_image
),
533 .format
= src_image
->vk_format
,
534 .subresourceRange
= {
535 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
536 .baseMipLevel
= src_iview
->base_mip
,
538 .baseArrayLayer
= layer
,
543 struct radv_image_view tdst_iview
;
544 radv_image_view_init(&tdst_iview
, cmd_buffer
->device
,
545 &(VkImageViewCreateInfo
) {
546 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
547 .image
= radv_image_to_handle(dst_image
),
548 .viewType
= radv_meta_get_view_type(dst_image
),
549 .format
= vk_to_non_srgb_format(dst_image
->vk_format
),
550 .subresourceRange
= {
551 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
552 .baseMipLevel
= dst_iview
->base_mip
,
554 .baseArrayLayer
= layer
,
558 emit_resolve(cmd_buffer
,
561 &(VkOffset2D
) { 0, 0 },
562 &(VkOffset2D
) { 0, 0 },
563 &(VkExtent2D
) { fb
->width
, fb
->height
});
567 cmd_buffer
->state
.flush_bits
|= RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
568 RADV_CMD_FLAG_INV_VMEM_L1
;
570 radv_meta_restore(&saved_state
, cmd_buffer
);