2 #include "nir/nir_builder.h"
8 build_buffer_fill_shader(struct radv_device
*dev
)
12 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
13 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, "meta_buffer_fill");
14 b
.shader
->info
.cs
.local_size
[0] = 64;
15 b
.shader
->info
.cs
.local_size
[1] = 1;
16 b
.shader
->info
.cs
.local_size
[2] = 1;
18 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
19 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
20 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
21 b
.shader
->info
.cs
.local_size
[0],
22 b
.shader
->info
.cs
.local_size
[1],
23 b
.shader
->info
.cs
.local_size
[2], 0);
25 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
27 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
28 offset
= nir_swizzle(&b
, offset
, (unsigned[]) {0, 0, 0, 0}, 1, false);
30 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
31 nir_intrinsic_vulkan_resource_index
);
32 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
33 nir_intrinsic_set_desc_set(dst_buf
, 0);
34 nir_intrinsic_set_binding(dst_buf
, 0);
35 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, 1, 32, NULL
);
36 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
38 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
39 nir_intrinsic_set_base(load
, 0);
40 nir_intrinsic_set_range(load
, 4);
41 load
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
42 load
->num_components
= 1;
43 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, "fill_value");
44 nir_builder_instr_insert(&b
, &load
->instr
);
46 nir_ssa_def
*swizzled_load
= nir_swizzle(&b
, &load
->dest
.ssa
, (unsigned[]) { 0, 0, 0, 0}, 4, false);
48 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
49 store
->src
[0] = nir_src_for_ssa(swizzled_load
);
50 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
51 store
->src
[2] = nir_src_for_ssa(offset
);
52 nir_intrinsic_set_write_mask(store
, 0xf);
53 store
->num_components
= 4;
54 nir_builder_instr_insert(&b
, &store
->instr
);
60 build_buffer_copy_shader(struct radv_device
*dev
)
64 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
65 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, "meta_buffer_copy");
66 b
.shader
->info
.cs
.local_size
[0] = 64;
67 b
.shader
->info
.cs
.local_size
[1] = 1;
68 b
.shader
->info
.cs
.local_size
[2] = 1;
70 nir_ssa_def
*invoc_id
= nir_load_system_value(&b
, nir_intrinsic_load_local_invocation_id
, 0);
71 nir_ssa_def
*wg_id
= nir_load_system_value(&b
, nir_intrinsic_load_work_group_id
, 0);
72 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
73 b
.shader
->info
.cs
.local_size
[0],
74 b
.shader
->info
.cs
.local_size
[1],
75 b
.shader
->info
.cs
.local_size
[2], 0);
77 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
79 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
80 offset
= nir_swizzle(&b
, offset
, (unsigned[]) {0, 0, 0, 0}, 1, false);
82 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
83 nir_intrinsic_vulkan_resource_index
);
84 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
85 nir_intrinsic_set_desc_set(dst_buf
, 0);
86 nir_intrinsic_set_binding(dst_buf
, 0);
87 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, 1, 32, NULL
);
88 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
90 nir_intrinsic_instr
*src_buf
= nir_intrinsic_instr_create(b
.shader
,
91 nir_intrinsic_vulkan_resource_index
);
92 src_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
93 nir_intrinsic_set_desc_set(src_buf
, 0);
94 nir_intrinsic_set_binding(src_buf
, 1);
95 nir_ssa_dest_init(&src_buf
->instr
, &src_buf
->dest
, 1, 32, NULL
);
96 nir_builder_instr_insert(&b
, &src_buf
->instr
);
98 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_ssbo
);
99 load
->src
[0] = nir_src_for_ssa(&src_buf
->dest
.ssa
);
100 load
->src
[1] = nir_src_for_ssa(offset
);
101 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, 32, NULL
);
102 load
->num_components
= 4;
103 nir_builder_instr_insert(&b
, &load
->instr
);
105 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
106 store
->src
[0] = nir_src_for_ssa(&load
->dest
.ssa
);
107 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
108 store
->src
[2] = nir_src_for_ssa(offset
);
109 nir_intrinsic_set_write_mask(store
, 0xf);
110 store
->num_components
= 4;
111 nir_builder_instr_insert(&b
, &store
->instr
);
118 VkResult
radv_device_init_meta_buffer_state(struct radv_device
*device
)
121 struct radv_shader_module fill_cs
= { .nir
= NULL
};
122 struct radv_shader_module copy_cs
= { .nir
= NULL
};
124 zero(device
->meta_state
.buffer
);
126 fill_cs
.nir
= build_buffer_fill_shader(device
);
127 copy_cs
.nir
= build_buffer_copy_shader(device
);
129 VkDescriptorSetLayoutCreateInfo fill_ds_create_info
= {
130 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
131 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
133 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
136 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
137 .descriptorCount
= 1,
138 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
139 .pImmutableSamplers
= NULL
144 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
145 &fill_ds_create_info
,
146 &device
->meta_state
.alloc
,
147 &device
->meta_state
.buffer
.fill_ds_layout
);
148 if (result
!= VK_SUCCESS
)
151 VkDescriptorSetLayoutCreateInfo copy_ds_create_info
= {
152 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
153 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
155 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
158 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
159 .descriptorCount
= 1,
160 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
161 .pImmutableSamplers
= NULL
165 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
166 .descriptorCount
= 1,
167 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
168 .pImmutableSamplers
= NULL
173 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
174 ©_ds_create_info
,
175 &device
->meta_state
.alloc
,
176 &device
->meta_state
.buffer
.copy_ds_layout
);
177 if (result
!= VK_SUCCESS
)
181 VkPipelineLayoutCreateInfo fill_pl_create_info
= {
182 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
184 .pSetLayouts
= &device
->meta_state
.buffer
.fill_ds_layout
,
185 .pushConstantRangeCount
= 1,
186 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4},
189 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
190 &fill_pl_create_info
,
191 &device
->meta_state
.alloc
,
192 &device
->meta_state
.buffer
.fill_p_layout
);
193 if (result
!= VK_SUCCESS
)
196 VkPipelineLayoutCreateInfo copy_pl_create_info
= {
197 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
199 .pSetLayouts
= &device
->meta_state
.buffer
.copy_ds_layout
,
200 .pushConstantRangeCount
= 0,
203 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
204 ©_pl_create_info
,
205 &device
->meta_state
.alloc
,
206 &device
->meta_state
.buffer
.copy_p_layout
);
207 if (result
!= VK_SUCCESS
)
210 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage
= {
211 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
212 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
213 .module
= radv_shader_module_to_handle(&fill_cs
),
215 .pSpecializationInfo
= NULL
,
218 VkComputePipelineCreateInfo fill_vk_pipeline_info
= {
219 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
220 .stage
= fill_pipeline_shader_stage
,
222 .layout
= device
->meta_state
.buffer
.fill_p_layout
,
225 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
226 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
227 1, &fill_vk_pipeline_info
, NULL
,
228 &device
->meta_state
.buffer
.fill_pipeline
);
229 if (result
!= VK_SUCCESS
)
232 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage
= {
233 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
234 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
235 .module
= radv_shader_module_to_handle(©_cs
),
237 .pSpecializationInfo
= NULL
,
240 VkComputePipelineCreateInfo copy_vk_pipeline_info
= {
241 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
242 .stage
= copy_pipeline_shader_stage
,
244 .layout
= device
->meta_state
.buffer
.copy_p_layout
,
247 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
248 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
249 1, ©_vk_pipeline_info
, NULL
,
250 &device
->meta_state
.buffer
.copy_pipeline
);
251 if (result
!= VK_SUCCESS
)
254 ralloc_free(fill_cs
.nir
);
255 ralloc_free(copy_cs
.nir
);
258 radv_device_finish_meta_buffer_state(device
);
259 ralloc_free(fill_cs
.nir
);
260 ralloc_free(copy_cs
.nir
);
264 void radv_device_finish_meta_buffer_state(struct radv_device
*device
)
266 if (device
->meta_state
.buffer
.copy_pipeline
)
267 radv_DestroyPipeline(radv_device_to_handle(device
),
268 device
->meta_state
.buffer
.copy_pipeline
,
269 &device
->meta_state
.alloc
);
271 if (device
->meta_state
.buffer
.fill_pipeline
)
272 radv_DestroyPipeline(radv_device_to_handle(device
),
273 device
->meta_state
.buffer
.fill_pipeline
,
274 &device
->meta_state
.alloc
);
276 if (device
->meta_state
.buffer
.copy_p_layout
)
277 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
278 device
->meta_state
.buffer
.copy_p_layout
,
279 &device
->meta_state
.alloc
);
281 if (device
->meta_state
.buffer
.fill_p_layout
)
282 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
283 device
->meta_state
.buffer
.fill_p_layout
,
284 &device
->meta_state
.alloc
);
286 if (device
->meta_state
.buffer
.copy_ds_layout
)
287 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
288 device
->meta_state
.buffer
.copy_ds_layout
,
289 &device
->meta_state
.alloc
);
291 if (device
->meta_state
.buffer
.fill_ds_layout
)
292 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
293 device
->meta_state
.buffer
.fill_ds_layout
,
294 &device
->meta_state
.alloc
);
297 static void fill_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
298 struct radeon_winsys_bo
*bo
,
299 uint64_t offset
, uint64_t size
, uint32_t value
)
301 struct radv_device
*device
= cmd_buffer
->device
;
302 uint64_t block_count
= round_up_u64(size
, 1024);
303 struct radv_meta_saved_compute_state saved_state
;
305 radv_meta_save_compute(&saved_state
, cmd_buffer
, 4);
307 struct radv_buffer dst_buffer
= {
313 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
314 VK_PIPELINE_BIND_POINT_COMPUTE
,
315 device
->meta_state
.buffer
.fill_pipeline
);
317 radv_meta_push_descriptor_set(cmd_buffer
, VK_PIPELINE_BIND_POINT_COMPUTE
,
318 device
->meta_state
.buffer
.fill_p_layout
,
320 1, /* descriptorWriteCount */
321 (VkWriteDescriptorSet
[]) {
323 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
325 .dstArrayElement
= 0,
326 .descriptorCount
= 1,
327 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
328 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
329 .buffer
= radv_buffer_to_handle(&dst_buffer
),
336 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
337 device
->meta_state
.buffer
.fill_p_layout
,
338 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4,
341 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
343 radv_meta_restore_compute(&saved_state
, cmd_buffer
, 4);
346 static void copy_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
347 struct radeon_winsys_bo
*src_bo
,
348 struct radeon_winsys_bo
*dst_bo
,
349 uint64_t src_offset
, uint64_t dst_offset
,
352 struct radv_device
*device
= cmd_buffer
->device
;
353 uint64_t block_count
= round_up_u64(size
, 1024);
354 struct radv_meta_saved_compute_state saved_state
;
356 radv_meta_save_compute(&saved_state
, cmd_buffer
, 0);
358 struct radv_buffer dst_buffer
= {
360 .offset
= dst_offset
,
364 struct radv_buffer src_buffer
= {
366 .offset
= src_offset
,
370 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
371 VK_PIPELINE_BIND_POINT_COMPUTE
,
372 device
->meta_state
.buffer
.copy_pipeline
);
374 radv_meta_push_descriptor_set(cmd_buffer
, VK_PIPELINE_BIND_POINT_COMPUTE
,
375 device
->meta_state
.buffer
.copy_p_layout
,
377 2, /* descriptorWriteCount */
378 (VkWriteDescriptorSet
[]) {
380 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
382 .dstArrayElement
= 0,
383 .descriptorCount
= 1,
384 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
385 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
386 .buffer
= radv_buffer_to_handle(&dst_buffer
),
392 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
394 .dstArrayElement
= 0,
395 .descriptorCount
= 1,
396 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
397 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
398 .buffer
= radv_buffer_to_handle(&src_buffer
),
405 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
407 radv_meta_restore_compute(&saved_state
, cmd_buffer
, 0);
411 void radv_fill_buffer(struct radv_cmd_buffer
*cmd_buffer
,
412 struct radeon_winsys_bo
*bo
,
413 uint64_t offset
, uint64_t size
, uint32_t value
)
415 assert(!(offset
& 3));
419 fill_buffer_shader(cmd_buffer
, bo
, offset
, size
, value
);
421 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(bo
);
423 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, bo
, 8);
424 si_cp_dma_clear_buffer(cmd_buffer
, va
, size
, value
);
429 void radv_copy_buffer(struct radv_cmd_buffer
*cmd_buffer
,
430 struct radeon_winsys_bo
*src_bo
,
431 struct radeon_winsys_bo
*dst_bo
,
432 uint64_t src_offset
, uint64_t dst_offset
,
435 if (size
>= 4096 && !(size
& 3) && !(src_offset
& 3) && !(dst_offset
& 3))
436 copy_buffer_shader(cmd_buffer
, src_bo
, dst_bo
,
437 src_offset
, dst_offset
, size
);
439 uint64_t src_va
= cmd_buffer
->device
->ws
->buffer_get_va(src_bo
);
440 uint64_t dst_va
= cmd_buffer
->device
->ws
->buffer_get_va(dst_bo
);
441 src_va
+= src_offset
;
442 dst_va
+= dst_offset
;
444 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, src_bo
, 8);
445 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_bo
, 8);
447 si_cp_dma_buffer_copy(cmd_buffer
, src_va
, dst_va
, size
);
451 void radv_CmdFillBuffer(
452 VkCommandBuffer commandBuffer
,
454 VkDeviceSize dstOffset
,
455 VkDeviceSize fillSize
,
458 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
459 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
461 if (fillSize
== VK_WHOLE_SIZE
)
462 fillSize
= (dst_buffer
->size
- dstOffset
) & ~3ull;
464 radv_fill_buffer(cmd_buffer
, dst_buffer
->bo
, dst_buffer
->offset
+ dstOffset
,
468 void radv_CmdCopyBuffer(
469 VkCommandBuffer commandBuffer
,
472 uint32_t regionCount
,
473 const VkBufferCopy
* pRegions
)
475 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
476 RADV_FROM_HANDLE(radv_buffer
, src_buffer
, srcBuffer
);
477 RADV_FROM_HANDLE(radv_buffer
, dest_buffer
, destBuffer
);
479 for (unsigned r
= 0; r
< regionCount
; r
++) {
480 uint64_t src_offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
;
481 uint64_t dest_offset
= dest_buffer
->offset
+ pRegions
[r
].dstOffset
;
482 uint64_t copy_size
= pRegions
[r
].size
;
484 radv_copy_buffer(cmd_buffer
, src_buffer
->bo
, dest_buffer
->bo
,
485 src_offset
, dest_offset
, copy_size
);
489 void radv_CmdUpdateBuffer(
490 VkCommandBuffer commandBuffer
,
492 VkDeviceSize dstOffset
,
493 VkDeviceSize dataSize
,
496 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
497 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
498 bool mec
= radv_cmd_buffer_uses_mec(cmd_buffer
);
499 uint64_t words
= dataSize
/ 4;
500 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(dst_buffer
->bo
);
501 va
+= dstOffset
+ dst_buffer
->offset
;
503 assert(!(dataSize
& 3));
509 if (dataSize
< 4096) {
510 si_emit_cache_flush(cmd_buffer
);
512 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_buffer
->bo
, 8);
514 radeon_check_space(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, words
+ 4);
516 radeon_emit(cmd_buffer
->cs
, PKT3(PKT3_WRITE_DATA
, 2 + words
, 0));
517 radeon_emit(cmd_buffer
->cs
, S_370_DST_SEL(mec
?
518 V_370_MEM_ASYNC
: V_370_MEMORY_SYNC
) |
519 S_370_WR_CONFIRM(1) |
520 S_370_ENGINE_SEL(V_370_ME
));
521 radeon_emit(cmd_buffer
->cs
, va
);
522 radeon_emit(cmd_buffer
->cs
, va
>> 32);
523 radeon_emit_array(cmd_buffer
->cs
, pData
, words
);
525 radv_cmd_buffer_trace_emit(cmd_buffer
);
528 radv_cmd_buffer_upload_data(cmd_buffer
, dataSize
, 32, pData
, &buf_offset
);
529 radv_copy_buffer(cmd_buffer
, cmd_buffer
->upload
.upload_bo
, dst_buffer
->bo
,
530 buf_offset
, dstOffset
+ dst_buffer
->offset
, dataSize
);