2 #include "nir/nir_builder.h"
8 build_buffer_fill_shader(struct radv_device
*dev
)
12 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
13 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, "meta_buffer_fill");
14 b
.shader
->info
.cs
.local_size
[0] = 64;
15 b
.shader
->info
.cs
.local_size
[1] = 1;
16 b
.shader
->info
.cs
.local_size
[2] = 1;
18 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
19 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
20 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
21 b
.shader
->info
.cs
.local_size
[0],
22 b
.shader
->info
.cs
.local_size
[1],
23 b
.shader
->info
.cs
.local_size
[2], 0);
25 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
27 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
28 offset
= nir_swizzle(&b
, offset
, (unsigned[]) {0, 0, 0, 0}, 1, false);
30 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
31 nir_intrinsic_vulkan_resource_index
);
32 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
33 nir_intrinsic_set_desc_set(dst_buf
, 0);
34 nir_intrinsic_set_binding(dst_buf
, 0);
35 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, 1, 32, NULL
);
36 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
38 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
39 nir_intrinsic_set_base(load
, 0);
40 nir_intrinsic_set_range(load
, 4);
41 load
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
42 load
->num_components
= 1;
43 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, "fill_value");
44 nir_builder_instr_insert(&b
, &load
->instr
);
46 nir_ssa_def
*swizzled_load
= nir_swizzle(&b
, &load
->dest
.ssa
, (unsigned[]) { 0, 0, 0, 0}, 4, false);
48 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
49 store
->src
[0] = nir_src_for_ssa(swizzled_load
);
50 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
51 store
->src
[2] = nir_src_for_ssa(offset
);
52 nir_intrinsic_set_write_mask(store
, 0xf);
53 store
->num_components
= 4;
54 nir_builder_instr_insert(&b
, &store
->instr
);
60 build_buffer_copy_shader(struct radv_device
*dev
)
64 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
65 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, "meta_buffer_copy");
66 b
.shader
->info
.cs
.local_size
[0] = 64;
67 b
.shader
->info
.cs
.local_size
[1] = 1;
68 b
.shader
->info
.cs
.local_size
[2] = 1;
70 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
71 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
72 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
73 b
.shader
->info
.cs
.local_size
[0],
74 b
.shader
->info
.cs
.local_size
[1],
75 b
.shader
->info
.cs
.local_size
[2], 0);
77 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
79 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
80 offset
= nir_swizzle(&b
, offset
, (unsigned[]) {0, 0, 0, 0}, 1, false);
82 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
83 nir_intrinsic_vulkan_resource_index
);
84 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
85 nir_intrinsic_set_desc_set(dst_buf
, 0);
86 nir_intrinsic_set_binding(dst_buf
, 0);
87 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, 1, 32, NULL
);
88 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
90 nir_intrinsic_instr
*src_buf
= nir_intrinsic_instr_create(b
.shader
,
91 nir_intrinsic_vulkan_resource_index
);
92 src_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
93 nir_intrinsic_set_desc_set(src_buf
, 0);
94 nir_intrinsic_set_binding(src_buf
, 1);
95 nir_ssa_dest_init(&src_buf
->instr
, &src_buf
->dest
, 1, 32, NULL
);
96 nir_builder_instr_insert(&b
, &src_buf
->instr
);
98 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_ssbo
);
99 load
->src
[0] = nir_src_for_ssa(&src_buf
->dest
.ssa
);
100 load
->src
[1] = nir_src_for_ssa(offset
);
101 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, 32, NULL
);
102 load
->num_components
= 4;
103 nir_builder_instr_insert(&b
, &load
->instr
);
105 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
106 store
->src
[0] = nir_src_for_ssa(&load
->dest
.ssa
);
107 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
108 store
->src
[2] = nir_src_for_ssa(offset
);
109 nir_intrinsic_set_write_mask(store
, 0xf);
110 store
->num_components
= 4;
111 nir_builder_instr_insert(&b
, &store
->instr
);
118 VkResult
radv_device_init_meta_buffer_state(struct radv_device
*device
)
121 struct radv_shader_module fill_cs
= { .nir
= NULL
};
122 struct radv_shader_module copy_cs
= { .nir
= NULL
};
124 fill_cs
.nir
= build_buffer_fill_shader(device
);
125 copy_cs
.nir
= build_buffer_copy_shader(device
);
127 VkDescriptorSetLayoutCreateInfo fill_ds_create_info
= {
128 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
129 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
131 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
134 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
135 .descriptorCount
= 1,
136 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
137 .pImmutableSamplers
= NULL
142 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
143 &fill_ds_create_info
,
144 &device
->meta_state
.alloc
,
145 &device
->meta_state
.buffer
.fill_ds_layout
);
146 if (result
!= VK_SUCCESS
)
149 VkDescriptorSetLayoutCreateInfo copy_ds_create_info
= {
150 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
151 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
153 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
156 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
157 .descriptorCount
= 1,
158 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
159 .pImmutableSamplers
= NULL
163 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
164 .descriptorCount
= 1,
165 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
166 .pImmutableSamplers
= NULL
171 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
172 ©_ds_create_info
,
173 &device
->meta_state
.alloc
,
174 &device
->meta_state
.buffer
.copy_ds_layout
);
175 if (result
!= VK_SUCCESS
)
179 VkPipelineLayoutCreateInfo fill_pl_create_info
= {
180 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
182 .pSetLayouts
= &device
->meta_state
.buffer
.fill_ds_layout
,
183 .pushConstantRangeCount
= 1,
184 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4},
187 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
188 &fill_pl_create_info
,
189 &device
->meta_state
.alloc
,
190 &device
->meta_state
.buffer
.fill_p_layout
);
191 if (result
!= VK_SUCCESS
)
194 VkPipelineLayoutCreateInfo copy_pl_create_info
= {
195 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
197 .pSetLayouts
= &device
->meta_state
.buffer
.copy_ds_layout
,
198 .pushConstantRangeCount
= 0,
201 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
202 ©_pl_create_info
,
203 &device
->meta_state
.alloc
,
204 &device
->meta_state
.buffer
.copy_p_layout
);
205 if (result
!= VK_SUCCESS
)
208 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage
= {
209 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
210 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
211 .module
= radv_shader_module_to_handle(&fill_cs
),
213 .pSpecializationInfo
= NULL
,
216 VkComputePipelineCreateInfo fill_vk_pipeline_info
= {
217 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
218 .stage
= fill_pipeline_shader_stage
,
220 .layout
= device
->meta_state
.buffer
.fill_p_layout
,
223 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
224 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
225 1, &fill_vk_pipeline_info
, NULL
,
226 &device
->meta_state
.buffer
.fill_pipeline
);
227 if (result
!= VK_SUCCESS
)
230 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage
= {
231 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
232 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
233 .module
= radv_shader_module_to_handle(©_cs
),
235 .pSpecializationInfo
= NULL
,
238 VkComputePipelineCreateInfo copy_vk_pipeline_info
= {
239 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
240 .stage
= copy_pipeline_shader_stage
,
242 .layout
= device
->meta_state
.buffer
.copy_p_layout
,
245 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
246 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
247 1, ©_vk_pipeline_info
, NULL
,
248 &device
->meta_state
.buffer
.copy_pipeline
);
249 if (result
!= VK_SUCCESS
)
252 ralloc_free(fill_cs
.nir
);
253 ralloc_free(copy_cs
.nir
);
256 radv_device_finish_meta_buffer_state(device
);
257 ralloc_free(fill_cs
.nir
);
258 ralloc_free(copy_cs
.nir
);
262 void radv_device_finish_meta_buffer_state(struct radv_device
*device
)
264 if (device
->meta_state
.buffer
.copy_pipeline
)
265 radv_DestroyPipeline(radv_device_to_handle(device
),
266 device
->meta_state
.buffer
.copy_pipeline
,
267 &device
->meta_state
.alloc
);
269 if (device
->meta_state
.buffer
.fill_pipeline
)
270 radv_DestroyPipeline(radv_device_to_handle(device
),
271 device
->meta_state
.buffer
.fill_pipeline
,
272 &device
->meta_state
.alloc
);
274 if (device
->meta_state
.buffer
.copy_p_layout
)
275 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
276 device
->meta_state
.buffer
.copy_p_layout
,
277 &device
->meta_state
.alloc
);
279 if (device
->meta_state
.buffer
.fill_p_layout
)
280 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
281 device
->meta_state
.buffer
.fill_p_layout
,
282 &device
->meta_state
.alloc
);
284 if (device
->meta_state
.buffer
.copy_ds_layout
)
285 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
286 device
->meta_state
.buffer
.copy_ds_layout
,
287 &device
->meta_state
.alloc
);
289 if (device
->meta_state
.buffer
.fill_ds_layout
)
290 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
291 device
->meta_state
.buffer
.fill_ds_layout
,
292 &device
->meta_state
.alloc
);
295 static void fill_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
296 struct radeon_winsys_bo
*bo
,
297 uint64_t offset
, uint64_t size
, uint32_t value
)
299 struct radv_device
*device
= cmd_buffer
->device
;
300 uint64_t block_count
= round_up_u64(size
, 1024);
301 struct radv_meta_saved_compute_state saved_state
;
303 radv_meta_save_compute(&saved_state
, cmd_buffer
, 4);
305 struct radv_buffer dst_buffer
= {
311 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
312 VK_PIPELINE_BIND_POINT_COMPUTE
,
313 device
->meta_state
.buffer
.fill_pipeline
);
315 radv_meta_push_descriptor_set(cmd_buffer
, VK_PIPELINE_BIND_POINT_COMPUTE
,
316 device
->meta_state
.buffer
.fill_p_layout
,
318 1, /* descriptorWriteCount */
319 (VkWriteDescriptorSet
[]) {
321 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
323 .dstArrayElement
= 0,
324 .descriptorCount
= 1,
325 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
326 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
327 .buffer
= radv_buffer_to_handle(&dst_buffer
),
334 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
335 device
->meta_state
.buffer
.fill_p_layout
,
336 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4,
339 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
341 radv_meta_restore_compute(&saved_state
, cmd_buffer
);
344 static void copy_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
345 struct radeon_winsys_bo
*src_bo
,
346 struct radeon_winsys_bo
*dst_bo
,
347 uint64_t src_offset
, uint64_t dst_offset
,
350 struct radv_device
*device
= cmd_buffer
->device
;
351 uint64_t block_count
= round_up_u64(size
, 1024);
352 struct radv_meta_saved_compute_state saved_state
;
354 radv_meta_save_compute(&saved_state
, cmd_buffer
, 0);
356 struct radv_buffer dst_buffer
= {
358 .offset
= dst_offset
,
362 struct radv_buffer src_buffer
= {
364 .offset
= src_offset
,
368 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
369 VK_PIPELINE_BIND_POINT_COMPUTE
,
370 device
->meta_state
.buffer
.copy_pipeline
);
372 radv_meta_push_descriptor_set(cmd_buffer
, VK_PIPELINE_BIND_POINT_COMPUTE
,
373 device
->meta_state
.buffer
.copy_p_layout
,
375 2, /* descriptorWriteCount */
376 (VkWriteDescriptorSet
[]) {
378 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
380 .dstArrayElement
= 0,
381 .descriptorCount
= 1,
382 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
383 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
384 .buffer
= radv_buffer_to_handle(&dst_buffer
),
390 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
392 .dstArrayElement
= 0,
393 .descriptorCount
= 1,
394 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
395 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
396 .buffer
= radv_buffer_to_handle(&src_buffer
),
403 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
405 radv_meta_restore_compute(&saved_state
, cmd_buffer
);
409 void radv_fill_buffer(struct radv_cmd_buffer
*cmd_buffer
,
410 struct radeon_winsys_bo
*bo
,
411 uint64_t offset
, uint64_t size
, uint32_t value
)
413 assert(!(offset
& 3));
417 fill_buffer_shader(cmd_buffer
, bo
, offset
, size
, value
);
419 uint64_t va
= radv_buffer_get_va(bo
);
421 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, bo
, 8);
422 si_cp_dma_clear_buffer(cmd_buffer
, va
, size
, value
);
427 void radv_copy_buffer(struct radv_cmd_buffer
*cmd_buffer
,
428 struct radeon_winsys_bo
*src_bo
,
429 struct radeon_winsys_bo
*dst_bo
,
430 uint64_t src_offset
, uint64_t dst_offset
,
433 if (size
>= 4096 && !(size
& 3) && !(src_offset
& 3) && !(dst_offset
& 3))
434 copy_buffer_shader(cmd_buffer
, src_bo
, dst_bo
,
435 src_offset
, dst_offset
, size
);
437 uint64_t src_va
= radv_buffer_get_va(src_bo
);
438 uint64_t dst_va
= radv_buffer_get_va(dst_bo
);
439 src_va
+= src_offset
;
440 dst_va
+= dst_offset
;
442 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, src_bo
, 8);
443 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_bo
, 8);
445 si_cp_dma_buffer_copy(cmd_buffer
, src_va
, dst_va
, size
);
449 void radv_CmdFillBuffer(
450 VkCommandBuffer commandBuffer
,
452 VkDeviceSize dstOffset
,
453 VkDeviceSize fillSize
,
456 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
457 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
459 if (fillSize
== VK_WHOLE_SIZE
)
460 fillSize
= (dst_buffer
->size
- dstOffset
) & ~3ull;
462 radv_fill_buffer(cmd_buffer
, dst_buffer
->bo
, dst_buffer
->offset
+ dstOffset
,
466 void radv_CmdCopyBuffer(
467 VkCommandBuffer commandBuffer
,
470 uint32_t regionCount
,
471 const VkBufferCopy
* pRegions
)
473 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
474 RADV_FROM_HANDLE(radv_buffer
, src_buffer
, srcBuffer
);
475 RADV_FROM_HANDLE(radv_buffer
, dest_buffer
, destBuffer
);
477 for (unsigned r
= 0; r
< regionCount
; r
++) {
478 uint64_t src_offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
;
479 uint64_t dest_offset
= dest_buffer
->offset
+ pRegions
[r
].dstOffset
;
480 uint64_t copy_size
= pRegions
[r
].size
;
482 radv_copy_buffer(cmd_buffer
, src_buffer
->bo
, dest_buffer
->bo
,
483 src_offset
, dest_offset
, copy_size
);
487 void radv_CmdUpdateBuffer(
488 VkCommandBuffer commandBuffer
,
490 VkDeviceSize dstOffset
,
491 VkDeviceSize dataSize
,
494 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
495 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
496 bool mec
= radv_cmd_buffer_uses_mec(cmd_buffer
);
497 uint64_t words
= dataSize
/ 4;
498 uint64_t va
= radv_buffer_get_va(dst_buffer
->bo
);
499 va
+= dstOffset
+ dst_buffer
->offset
;
501 assert(!(dataSize
& 3));
507 if (dataSize
< 4096) {
508 si_emit_cache_flush(cmd_buffer
);
510 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_buffer
->bo
, 8);
512 radeon_check_space(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, words
+ 4);
514 radeon_emit(cmd_buffer
->cs
, PKT3(PKT3_WRITE_DATA
, 2 + words
, 0));
515 radeon_emit(cmd_buffer
->cs
, S_370_DST_SEL(mec
?
516 V_370_MEM_ASYNC
: V_370_MEMORY_SYNC
) |
517 S_370_WR_CONFIRM(1) |
518 S_370_ENGINE_SEL(V_370_ME
));
519 radeon_emit(cmd_buffer
->cs
, va
);
520 radeon_emit(cmd_buffer
->cs
, va
>> 32);
521 radeon_emit_array(cmd_buffer
->cs
, pData
, words
);
523 radv_cmd_buffer_trace_emit(cmd_buffer
);
526 radv_cmd_buffer_upload_data(cmd_buffer
, dataSize
, 32, pData
, &buf_offset
);
527 radv_copy_buffer(cmd_buffer
, cmd_buffer
->upload
.upload_bo
, dst_buffer
->bo
,
528 buf_offset
, dstOffset
+ dst_buffer
->offset
, dataSize
);