2 #include "nir/nir_builder.h"
8 build_buffer_fill_shader(struct radv_device
*dev
)
12 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
13 b
.shader
->info
->name
= ralloc_strdup(b
.shader
, "meta_buffer_fill");
14 b
.shader
->info
->cs
.local_size
[0] = 64;
15 b
.shader
->info
->cs
.local_size
[1] = 1;
16 b
.shader
->info
->cs
.local_size
[2] = 1;
18 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
19 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
20 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
21 b
.shader
->info
->cs
.local_size
[0],
22 b
.shader
->info
->cs
.local_size
[1],
23 b
.shader
->info
->cs
.local_size
[2], 0);
25 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
27 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
28 offset
= nir_swizzle(&b
, offset
, (unsigned[]) {0, 0, 0, 0}, 1, false);
30 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
31 nir_intrinsic_vulkan_resource_index
);
32 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
33 nir_intrinsic_set_desc_set(dst_buf
, 0);
34 nir_intrinsic_set_binding(dst_buf
, 0);
35 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, 1, 32, NULL
);
36 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
38 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
39 load
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
40 load
->num_components
= 1;
41 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, "fill_value");
42 nir_builder_instr_insert(&b
, &load
->instr
);
44 nir_ssa_def
*swizzled_load
= nir_swizzle(&b
, &load
->dest
.ssa
, (unsigned[]) { 0, 0, 0, 0}, 4, false);
46 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
47 store
->src
[0] = nir_src_for_ssa(swizzled_load
);
48 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
49 store
->src
[2] = nir_src_for_ssa(offset
);
50 nir_intrinsic_set_write_mask(store
, 0xf);
51 store
->num_components
= 4;
52 nir_builder_instr_insert(&b
, &store
->instr
);
58 build_buffer_copy_shader(struct radv_device
*dev
)
62 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
63 b
.shader
->info
->name
= ralloc_strdup(b
.shader
, "meta_buffer_copy");
64 b
.shader
->info
->cs
.local_size
[0] = 64;
65 b
.shader
->info
->cs
.local_size
[1] = 1;
66 b
.shader
->info
->cs
.local_size
[2] = 1;
68 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
69 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
70 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
71 b
.shader
->info
->cs
.local_size
[0],
72 b
.shader
->info
->cs
.local_size
[1],
73 b
.shader
->info
->cs
.local_size
[2], 0);
75 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
77 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
78 offset
= nir_swizzle(&b
, offset
, (unsigned[]) {0, 0, 0, 0}, 1, false);
80 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
81 nir_intrinsic_vulkan_resource_index
);
82 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
83 nir_intrinsic_set_desc_set(dst_buf
, 0);
84 nir_intrinsic_set_binding(dst_buf
, 0);
85 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, 1, 32, NULL
);
86 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
88 nir_intrinsic_instr
*src_buf
= nir_intrinsic_instr_create(b
.shader
,
89 nir_intrinsic_vulkan_resource_index
);
90 src_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
91 nir_intrinsic_set_desc_set(src_buf
, 0);
92 nir_intrinsic_set_binding(src_buf
, 1);
93 nir_ssa_dest_init(&src_buf
->instr
, &src_buf
->dest
, 1, 32, NULL
);
94 nir_builder_instr_insert(&b
, &src_buf
->instr
);
96 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_ssbo
);
97 load
->src
[0] = nir_src_for_ssa(&src_buf
->dest
.ssa
);
98 load
->src
[1] = nir_src_for_ssa(offset
);
99 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, 32, NULL
);
100 load
->num_components
= 4;
101 nir_builder_instr_insert(&b
, &load
->instr
);
103 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
104 store
->src
[0] = nir_src_for_ssa(&load
->dest
.ssa
);
105 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
106 store
->src
[2] = nir_src_for_ssa(offset
);
107 nir_intrinsic_set_write_mask(store
, 0xf);
108 store
->num_components
= 4;
109 nir_builder_instr_insert(&b
, &store
->instr
);
116 VkResult
radv_device_init_meta_buffer_state(struct radv_device
*device
)
119 struct radv_shader_module fill_cs
= { .nir
= NULL
};
120 struct radv_shader_module copy_cs
= { .nir
= NULL
};
122 zero(device
->meta_state
.buffer
);
124 fill_cs
.nir
= build_buffer_fill_shader(device
);
125 copy_cs
.nir
= build_buffer_copy_shader(device
);
127 VkDescriptorSetLayoutCreateInfo fill_ds_create_info
= {
128 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
130 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
133 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
134 .descriptorCount
= 1,
135 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
136 .pImmutableSamplers
= NULL
141 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
142 &fill_ds_create_info
,
143 &device
->meta_state
.alloc
,
144 &device
->meta_state
.buffer
.fill_ds_layout
);
145 if (result
!= VK_SUCCESS
)
148 VkDescriptorSetLayoutCreateInfo copy_ds_create_info
= {
149 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
151 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
154 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
155 .descriptorCount
= 1,
156 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
157 .pImmutableSamplers
= NULL
161 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
162 .descriptorCount
= 1,
163 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
164 .pImmutableSamplers
= NULL
169 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
170 ©_ds_create_info
,
171 &device
->meta_state
.alloc
,
172 &device
->meta_state
.buffer
.copy_ds_layout
);
173 if (result
!= VK_SUCCESS
)
177 VkPipelineLayoutCreateInfo fill_pl_create_info
= {
178 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
180 .pSetLayouts
= &device
->meta_state
.buffer
.fill_ds_layout
,
181 .pushConstantRangeCount
= 1,
182 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4},
185 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
186 &fill_pl_create_info
,
187 &device
->meta_state
.alloc
,
188 &device
->meta_state
.buffer
.fill_p_layout
);
189 if (result
!= VK_SUCCESS
)
192 VkPipelineLayoutCreateInfo copy_pl_create_info
= {
193 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
195 .pSetLayouts
= &device
->meta_state
.buffer
.copy_ds_layout
,
196 .pushConstantRangeCount
= 0,
199 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
200 ©_pl_create_info
,
201 &device
->meta_state
.alloc
,
202 &device
->meta_state
.buffer
.copy_p_layout
);
203 if (result
!= VK_SUCCESS
)
206 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage
= {
207 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
208 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
209 .module
= radv_shader_module_to_handle(&fill_cs
),
211 .pSpecializationInfo
= NULL
,
214 VkComputePipelineCreateInfo fill_vk_pipeline_info
= {
215 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
216 .stage
= fill_pipeline_shader_stage
,
218 .layout
= device
->meta_state
.buffer
.fill_p_layout
,
221 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
222 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
223 1, &fill_vk_pipeline_info
, NULL
,
224 &device
->meta_state
.buffer
.fill_pipeline
);
225 if (result
!= VK_SUCCESS
)
228 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage
= {
229 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
230 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
231 .module
= radv_shader_module_to_handle(©_cs
),
233 .pSpecializationInfo
= NULL
,
236 VkComputePipelineCreateInfo copy_vk_pipeline_info
= {
237 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
238 .stage
= copy_pipeline_shader_stage
,
240 .layout
= device
->meta_state
.buffer
.copy_p_layout
,
243 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
244 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
245 1, ©_vk_pipeline_info
, NULL
,
246 &device
->meta_state
.buffer
.copy_pipeline
);
247 if (result
!= VK_SUCCESS
)
250 ralloc_free(fill_cs
.nir
);
251 ralloc_free(copy_cs
.nir
);
254 radv_device_finish_meta_buffer_state(device
);
255 ralloc_free(fill_cs
.nir
);
256 ralloc_free(copy_cs
.nir
);
260 void radv_device_finish_meta_buffer_state(struct radv_device
*device
)
262 if (device
->meta_state
.buffer
.copy_pipeline
)
263 radv_DestroyPipeline(radv_device_to_handle(device
),
264 device
->meta_state
.buffer
.copy_pipeline
,
265 &device
->meta_state
.alloc
);
267 if (device
->meta_state
.buffer
.fill_pipeline
)
268 radv_DestroyPipeline(radv_device_to_handle(device
),
269 device
->meta_state
.buffer
.fill_pipeline
,
270 &device
->meta_state
.alloc
);
272 if (device
->meta_state
.buffer
.copy_p_layout
)
273 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
274 device
->meta_state
.buffer
.copy_p_layout
,
275 &device
->meta_state
.alloc
);
277 if (device
->meta_state
.buffer
.fill_p_layout
)
278 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
279 device
->meta_state
.buffer
.fill_p_layout
,
280 &device
->meta_state
.alloc
);
282 if (device
->meta_state
.buffer
.copy_ds_layout
)
283 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
284 device
->meta_state
.buffer
.copy_ds_layout
,
285 &device
->meta_state
.alloc
);
287 if (device
->meta_state
.buffer
.fill_ds_layout
)
288 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
289 device
->meta_state
.buffer
.fill_ds_layout
,
290 &device
->meta_state
.alloc
);
293 static void fill_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
294 struct radeon_winsys_bo
*bo
,
295 uint64_t offset
, uint64_t size
, uint32_t value
)
297 struct radv_device
*device
= cmd_buffer
->device
;
298 uint64_t block_count
= round_up_u64(size
, 1024);
299 struct radv_meta_saved_compute_state saved_state
;
302 radv_meta_save_compute(&saved_state
, cmd_buffer
, 4);
304 radv_temp_descriptor_set_create(device
, cmd_buffer
,
305 device
->meta_state
.buffer
.fill_ds_layout
,
308 struct radv_buffer dst_buffer
= {
314 radv_UpdateDescriptorSets(radv_device_to_handle(device
),
316 (VkWriteDescriptorSet
[]) {
318 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
321 .dstArrayElement
= 0,
322 .descriptorCount
= 1,
323 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
324 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
325 .buffer
= radv_buffer_to_handle(&dst_buffer
),
332 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
333 VK_PIPELINE_BIND_POINT_COMPUTE
,
334 device
->meta_state
.buffer
.fill_pipeline
);
336 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer
),
337 VK_PIPELINE_BIND_POINT_COMPUTE
,
338 device
->meta_state
.buffer
.fill_p_layout
, 0, 1,
341 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
342 device
->meta_state
.buffer
.fill_p_layout
,
343 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4,
346 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
348 radv_temp_descriptor_set_destroy(device
, ds
);
350 radv_meta_restore_compute(&saved_state
, cmd_buffer
, 4);
353 static void copy_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
354 struct radeon_winsys_bo
*src_bo
,
355 struct radeon_winsys_bo
*dst_bo
,
356 uint64_t src_offset
, uint64_t dst_offset
,
359 struct radv_device
*device
= cmd_buffer
->device
;
360 uint64_t block_count
= round_up_u64(size
, 1024);
361 struct radv_meta_saved_compute_state saved_state
;
364 radv_meta_save_compute(&saved_state
, cmd_buffer
, 0);
366 radv_temp_descriptor_set_create(device
, cmd_buffer
,
367 device
->meta_state
.buffer
.copy_ds_layout
,
370 struct radv_buffer dst_buffer
= {
372 .offset
= dst_offset
,
376 struct radv_buffer src_buffer
= {
378 .offset
= src_offset
,
382 radv_UpdateDescriptorSets(radv_device_to_handle(device
),
384 (VkWriteDescriptorSet
[]) {
386 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
389 .dstArrayElement
= 0,
390 .descriptorCount
= 1,
391 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
392 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
393 .buffer
= radv_buffer_to_handle(&dst_buffer
),
399 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
402 .dstArrayElement
= 0,
403 .descriptorCount
= 1,
404 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
405 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
406 .buffer
= radv_buffer_to_handle(&src_buffer
),
413 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
414 VK_PIPELINE_BIND_POINT_COMPUTE
,
415 device
->meta_state
.buffer
.copy_pipeline
);
417 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer
),
418 VK_PIPELINE_BIND_POINT_COMPUTE
,
419 device
->meta_state
.buffer
.copy_p_layout
, 0, 1,
423 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
425 radv_temp_descriptor_set_destroy(device
, ds
);
427 radv_meta_restore_compute(&saved_state
, cmd_buffer
, 0);
431 void radv_fill_buffer(struct radv_cmd_buffer
*cmd_buffer
,
432 struct radeon_winsys_bo
*bo
,
433 uint64_t offset
, uint64_t size
, uint32_t value
)
435 assert(!(offset
& 3));
439 fill_buffer_shader(cmd_buffer
, bo
, offset
, size
, value
);
441 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(bo
);
443 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, bo
, 8);
444 si_cp_dma_clear_buffer(cmd_buffer
, va
, size
, value
);
449 void radv_copy_buffer(struct radv_cmd_buffer
*cmd_buffer
,
450 struct radeon_winsys_bo
*src_bo
,
451 struct radeon_winsys_bo
*dst_bo
,
452 uint64_t src_offset
, uint64_t dst_offset
,
455 if (size
>= 4096 && !(size
& 3) && !(src_offset
& 3) && !(dst_offset
& 3))
456 copy_buffer_shader(cmd_buffer
, src_bo
, dst_bo
,
457 src_offset
, dst_offset
, size
);
459 uint64_t src_va
= cmd_buffer
->device
->ws
->buffer_get_va(src_bo
);
460 uint64_t dst_va
= cmd_buffer
->device
->ws
->buffer_get_va(dst_bo
);
461 src_va
+= src_offset
;
462 dst_va
+= dst_offset
;
464 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, src_bo
, 8);
465 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_bo
, 8);
467 si_cp_dma_buffer_copy(cmd_buffer
, src_va
, dst_va
, size
);
471 void radv_CmdFillBuffer(
472 VkCommandBuffer commandBuffer
,
474 VkDeviceSize dstOffset
,
475 VkDeviceSize fillSize
,
478 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
479 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
481 if (fillSize
== VK_WHOLE_SIZE
)
482 fillSize
= (dst_buffer
->size
- dstOffset
) & ~3ull;
484 radv_fill_buffer(cmd_buffer
, dst_buffer
->bo
, dst_buffer
->offset
+ dstOffset
,
488 void radv_CmdCopyBuffer(
489 VkCommandBuffer commandBuffer
,
492 uint32_t regionCount
,
493 const VkBufferCopy
* pRegions
)
495 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
496 RADV_FROM_HANDLE(radv_buffer
, src_buffer
, srcBuffer
);
497 RADV_FROM_HANDLE(radv_buffer
, dest_buffer
, destBuffer
);
499 for (unsigned r
= 0; r
< regionCount
; r
++) {
500 uint64_t src_offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
;
501 uint64_t dest_offset
= dest_buffer
->offset
+ pRegions
[r
].dstOffset
;
502 uint64_t copy_size
= pRegions
[r
].size
;
504 radv_copy_buffer(cmd_buffer
, src_buffer
->bo
, dest_buffer
->bo
,
505 src_offset
, dest_offset
, copy_size
);
509 void radv_CmdUpdateBuffer(
510 VkCommandBuffer commandBuffer
,
512 VkDeviceSize dstOffset
,
513 VkDeviceSize dataSize
,
516 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
517 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
518 uint64_t words
= dataSize
/ 4;
519 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(dst_buffer
->bo
);
520 va
+= dstOffset
+ dst_buffer
->offset
;
522 assert(!(dataSize
& 3));
525 if (dataSize
< 4096) {
526 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_buffer
->bo
, 8);
528 radeon_check_space(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, words
+ 4);
530 radeon_emit(cmd_buffer
->cs
, PKT3(PKT3_WRITE_DATA
, 2 + words
, 0));
531 radeon_emit(cmd_buffer
->cs
, S_370_DST_SEL(V_370_MEMORY_SYNC
) |
532 S_370_WR_CONFIRM(1) |
533 S_370_ENGINE_SEL(V_370_ME
));
534 radeon_emit(cmd_buffer
->cs
, va
);
535 radeon_emit(cmd_buffer
->cs
, va
>> 32);
536 radeon_emit_array(cmd_buffer
->cs
, pData
, words
);
539 radv_cmd_buffer_upload_data(cmd_buffer
, dataSize
, 32, pData
, &buf_offset
);
540 radv_copy_buffer(cmd_buffer
, cmd_buffer
->upload
.upload_bo
, dst_buffer
->bo
,
541 buf_offset
, dstOffset
+ dst_buffer
->offset
, dataSize
);