2 #include "nir/nir_builder.h"
8 build_buffer_fill_shader(struct radv_device
*dev
)
12 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
13 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, "meta_buffer_fill");
14 b
.shader
->info
.cs
.local_size
[0] = 64;
15 b
.shader
->info
.cs
.local_size
[1] = 1;
16 b
.shader
->info
.cs
.local_size
[2] = 1;
18 nir_ssa_def
*invoc_id
= nir_load_local_invocation_id(&b
);
19 nir_ssa_def
*wg_id
= nir_load_work_group_id(&b
);
20 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
21 b
.shader
->info
.cs
.local_size
[0],
22 b
.shader
->info
.cs
.local_size
[1],
23 b
.shader
->info
.cs
.local_size
[2], 0);
25 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
27 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
28 offset
= nir_channel(&b
, offset
, 0);
30 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
31 nir_intrinsic_vulkan_resource_index
);
32 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
33 dst_buf
->num_components
= 1;
34 nir_intrinsic_set_desc_set(dst_buf
, 0);
35 nir_intrinsic_set_binding(dst_buf
, 0);
36 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, dst_buf
->num_components
, 32, NULL
);
37 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
39 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_push_constant
);
40 nir_intrinsic_set_base(load
, 0);
41 nir_intrinsic_set_range(load
, 4);
42 load
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
43 load
->num_components
= 1;
44 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, "fill_value");
45 nir_builder_instr_insert(&b
, &load
->instr
);
47 nir_ssa_def
*swizzled_load
= nir_swizzle(&b
, &load
->dest
.ssa
, (unsigned[]) { 0, 0, 0, 0}, 4);
49 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
50 store
->src
[0] = nir_src_for_ssa(swizzled_load
);
51 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
52 store
->src
[2] = nir_src_for_ssa(offset
);
53 nir_intrinsic_set_write_mask(store
, 0xf);
54 nir_intrinsic_set_access(store
, ACCESS_NON_READABLE
);
55 store
->num_components
= 4;
56 nir_builder_instr_insert(&b
, &store
->instr
);
62 build_buffer_copy_shader(struct radv_device
*dev
)
66 nir_builder_init_simple_shader(&b
, NULL
, MESA_SHADER_COMPUTE
, NULL
);
67 b
.shader
->info
.name
= ralloc_strdup(b
.shader
, "meta_buffer_copy");
68 b
.shader
->info
.cs
.local_size
[0] = 64;
69 b
.shader
->info
.cs
.local_size
[1] = 1;
70 b
.shader
->info
.cs
.local_size
[2] = 1;
72 nir_ssa_def
*invoc_id
= nir_load_local_invocation_id(&b
);
73 nir_ssa_def
*wg_id
= nir_load_work_group_id(&b
);
74 nir_ssa_def
*block_size
= nir_imm_ivec4(&b
,
75 b
.shader
->info
.cs
.local_size
[0],
76 b
.shader
->info
.cs
.local_size
[1],
77 b
.shader
->info
.cs
.local_size
[2], 0);
79 nir_ssa_def
*global_id
= nir_iadd(&b
, nir_imul(&b
, wg_id
, block_size
), invoc_id
);
81 nir_ssa_def
*offset
= nir_imul(&b
, global_id
, nir_imm_int(&b
, 16));
82 offset
= nir_channel(&b
, offset
, 0);
84 nir_intrinsic_instr
*dst_buf
= nir_intrinsic_instr_create(b
.shader
,
85 nir_intrinsic_vulkan_resource_index
);
86 dst_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
87 dst_buf
->num_components
= 1;
88 nir_intrinsic_set_desc_set(dst_buf
, 0);
89 nir_intrinsic_set_binding(dst_buf
, 0);
90 nir_ssa_dest_init(&dst_buf
->instr
, &dst_buf
->dest
, dst_buf
->num_components
, 32, NULL
);
91 nir_builder_instr_insert(&b
, &dst_buf
->instr
);
93 nir_intrinsic_instr
*src_buf
= nir_intrinsic_instr_create(b
.shader
,
94 nir_intrinsic_vulkan_resource_index
);
95 src_buf
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
96 src_buf
->num_components
= 1;
97 nir_intrinsic_set_desc_set(src_buf
, 0);
98 nir_intrinsic_set_binding(src_buf
, 1);
99 nir_ssa_dest_init(&src_buf
->instr
, &src_buf
->dest
, src_buf
->num_components
, 32, NULL
);
100 nir_builder_instr_insert(&b
, &src_buf
->instr
);
102 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_ssbo
);
103 load
->src
[0] = nir_src_for_ssa(&src_buf
->dest
.ssa
);
104 load
->src
[1] = nir_src_for_ssa(offset
);
105 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, 32, NULL
);
106 load
->num_components
= 4;
107 nir_builder_instr_insert(&b
, &load
->instr
);
109 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_ssbo
);
110 store
->src
[0] = nir_src_for_ssa(&load
->dest
.ssa
);
111 store
->src
[1] = nir_src_for_ssa(&dst_buf
->dest
.ssa
);
112 store
->src
[2] = nir_src_for_ssa(offset
);
113 nir_intrinsic_set_write_mask(store
, 0xf);
114 nir_intrinsic_set_access(store
, ACCESS_NON_READABLE
);
115 store
->num_components
= 4;
116 nir_builder_instr_insert(&b
, &store
->instr
);
123 VkResult
radv_device_init_meta_buffer_state(struct radv_device
*device
)
126 struct radv_shader_module fill_cs
= { .nir
= NULL
};
127 struct radv_shader_module copy_cs
= { .nir
= NULL
};
129 fill_cs
.nir
= build_buffer_fill_shader(device
);
130 copy_cs
.nir
= build_buffer_copy_shader(device
);
132 VkDescriptorSetLayoutCreateInfo fill_ds_create_info
= {
133 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
134 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
136 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
139 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
140 .descriptorCount
= 1,
141 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
142 .pImmutableSamplers
= NULL
147 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
148 &fill_ds_create_info
,
149 &device
->meta_state
.alloc
,
150 &device
->meta_state
.buffer
.fill_ds_layout
);
151 if (result
!= VK_SUCCESS
)
154 VkDescriptorSetLayoutCreateInfo copy_ds_create_info
= {
155 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
156 .flags
= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
,
158 .pBindings
= (VkDescriptorSetLayoutBinding
[]) {
161 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
162 .descriptorCount
= 1,
163 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
164 .pImmutableSamplers
= NULL
168 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
169 .descriptorCount
= 1,
170 .stageFlags
= VK_SHADER_STAGE_COMPUTE_BIT
,
171 .pImmutableSamplers
= NULL
176 result
= radv_CreateDescriptorSetLayout(radv_device_to_handle(device
),
177 ©_ds_create_info
,
178 &device
->meta_state
.alloc
,
179 &device
->meta_state
.buffer
.copy_ds_layout
);
180 if (result
!= VK_SUCCESS
)
184 VkPipelineLayoutCreateInfo fill_pl_create_info
= {
185 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
187 .pSetLayouts
= &device
->meta_state
.buffer
.fill_ds_layout
,
188 .pushConstantRangeCount
= 1,
189 .pPushConstantRanges
= &(VkPushConstantRange
){VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4},
192 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
193 &fill_pl_create_info
,
194 &device
->meta_state
.alloc
,
195 &device
->meta_state
.buffer
.fill_p_layout
);
196 if (result
!= VK_SUCCESS
)
199 VkPipelineLayoutCreateInfo copy_pl_create_info
= {
200 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
202 .pSetLayouts
= &device
->meta_state
.buffer
.copy_ds_layout
,
203 .pushConstantRangeCount
= 0,
206 result
= radv_CreatePipelineLayout(radv_device_to_handle(device
),
207 ©_pl_create_info
,
208 &device
->meta_state
.alloc
,
209 &device
->meta_state
.buffer
.copy_p_layout
);
210 if (result
!= VK_SUCCESS
)
213 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage
= {
214 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
215 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
216 .module
= radv_shader_module_to_handle(&fill_cs
),
218 .pSpecializationInfo
= NULL
,
221 VkComputePipelineCreateInfo fill_vk_pipeline_info
= {
222 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
223 .stage
= fill_pipeline_shader_stage
,
225 .layout
= device
->meta_state
.buffer
.fill_p_layout
,
228 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
229 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
230 1, &fill_vk_pipeline_info
, NULL
,
231 &device
->meta_state
.buffer
.fill_pipeline
);
232 if (result
!= VK_SUCCESS
)
235 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage
= {
236 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
237 .stage
= VK_SHADER_STAGE_COMPUTE_BIT
,
238 .module
= radv_shader_module_to_handle(©_cs
),
240 .pSpecializationInfo
= NULL
,
243 VkComputePipelineCreateInfo copy_vk_pipeline_info
= {
244 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
245 .stage
= copy_pipeline_shader_stage
,
247 .layout
= device
->meta_state
.buffer
.copy_p_layout
,
250 result
= radv_CreateComputePipelines(radv_device_to_handle(device
),
251 radv_pipeline_cache_to_handle(&device
->meta_state
.cache
),
252 1, ©_vk_pipeline_info
, NULL
,
253 &device
->meta_state
.buffer
.copy_pipeline
);
254 if (result
!= VK_SUCCESS
)
257 ralloc_free(fill_cs
.nir
);
258 ralloc_free(copy_cs
.nir
);
261 radv_device_finish_meta_buffer_state(device
);
262 ralloc_free(fill_cs
.nir
);
263 ralloc_free(copy_cs
.nir
);
267 void radv_device_finish_meta_buffer_state(struct radv_device
*device
)
269 struct radv_meta_state
*state
= &device
->meta_state
;
271 radv_DestroyPipeline(radv_device_to_handle(device
),
272 state
->buffer
.copy_pipeline
, &state
->alloc
);
273 radv_DestroyPipeline(radv_device_to_handle(device
),
274 state
->buffer
.fill_pipeline
, &state
->alloc
);
275 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
276 state
->buffer
.copy_p_layout
, &state
->alloc
);
277 radv_DestroyPipelineLayout(radv_device_to_handle(device
),
278 state
->buffer
.fill_p_layout
, &state
->alloc
);
279 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
280 state
->buffer
.copy_ds_layout
,
282 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device
),
283 state
->buffer
.fill_ds_layout
,
287 static void fill_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
288 struct radeon_winsys_bo
*bo
,
289 uint64_t offset
, uint64_t size
, uint32_t value
)
291 struct radv_device
*device
= cmd_buffer
->device
;
292 uint64_t block_count
= round_up_u64(size
, 1024);
293 struct radv_meta_saved_state saved_state
;
295 radv_meta_save(&saved_state
, cmd_buffer
,
296 RADV_META_SAVE_COMPUTE_PIPELINE
|
297 RADV_META_SAVE_CONSTANTS
|
298 RADV_META_SAVE_DESCRIPTORS
);
300 struct radv_buffer dst_buffer
= {
306 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
307 VK_PIPELINE_BIND_POINT_COMPUTE
,
308 device
->meta_state
.buffer
.fill_pipeline
);
310 radv_meta_push_descriptor_set(cmd_buffer
, VK_PIPELINE_BIND_POINT_COMPUTE
,
311 device
->meta_state
.buffer
.fill_p_layout
,
313 1, /* descriptorWriteCount */
314 (VkWriteDescriptorSet
[]) {
316 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
318 .dstArrayElement
= 0,
319 .descriptorCount
= 1,
320 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
321 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
322 .buffer
= radv_buffer_to_handle(&dst_buffer
),
329 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer
),
330 device
->meta_state
.buffer
.fill_p_layout
,
331 VK_SHADER_STAGE_COMPUTE_BIT
, 0, 4,
334 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
336 radv_meta_restore(&saved_state
, cmd_buffer
);
339 static void copy_buffer_shader(struct radv_cmd_buffer
*cmd_buffer
,
340 struct radeon_winsys_bo
*src_bo
,
341 struct radeon_winsys_bo
*dst_bo
,
342 uint64_t src_offset
, uint64_t dst_offset
,
345 struct radv_device
*device
= cmd_buffer
->device
;
346 uint64_t block_count
= round_up_u64(size
, 1024);
347 struct radv_meta_saved_state saved_state
;
349 radv_meta_save(&saved_state
, cmd_buffer
,
350 RADV_META_SAVE_COMPUTE_PIPELINE
|
351 RADV_META_SAVE_DESCRIPTORS
);
353 struct radv_buffer dst_buffer
= {
355 .offset
= dst_offset
,
359 struct radv_buffer src_buffer
= {
361 .offset
= src_offset
,
365 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer
),
366 VK_PIPELINE_BIND_POINT_COMPUTE
,
367 device
->meta_state
.buffer
.copy_pipeline
);
369 radv_meta_push_descriptor_set(cmd_buffer
, VK_PIPELINE_BIND_POINT_COMPUTE
,
370 device
->meta_state
.buffer
.copy_p_layout
,
372 2, /* descriptorWriteCount */
373 (VkWriteDescriptorSet
[]) {
375 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
377 .dstArrayElement
= 0,
378 .descriptorCount
= 1,
379 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
380 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
381 .buffer
= radv_buffer_to_handle(&dst_buffer
),
387 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
389 .dstArrayElement
= 0,
390 .descriptorCount
= 1,
391 .descriptorType
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
392 .pBufferInfo
= &(VkDescriptorBufferInfo
) {
393 .buffer
= radv_buffer_to_handle(&src_buffer
),
400 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer
), block_count
, 1, 1);
402 radv_meta_restore(&saved_state
, cmd_buffer
);
406 uint32_t radv_fill_buffer(struct radv_cmd_buffer
*cmd_buffer
,
407 struct radeon_winsys_bo
*bo
,
408 uint64_t offset
, uint64_t size
, uint32_t value
)
410 uint32_t flush_bits
= 0;
412 assert(!(offset
& 3));
415 if (size
>= RADV_BUFFER_OPS_CS_THRESHOLD
) {
416 fill_buffer_shader(cmd_buffer
, bo
, offset
, size
, value
);
417 flush_bits
= RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
418 RADV_CMD_FLAG_INV_VCACHE
|
421 uint64_t va
= radv_buffer_get_va(bo
);
423 radv_cs_add_buffer(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, bo
);
424 si_cp_dma_clear_buffer(cmd_buffer
, va
, size
, value
);
431 void radv_copy_buffer(struct radv_cmd_buffer
*cmd_buffer
,
432 struct radeon_winsys_bo
*src_bo
,
433 struct radeon_winsys_bo
*dst_bo
,
434 uint64_t src_offset
, uint64_t dst_offset
,
437 if (size
>= RADV_BUFFER_OPS_CS_THRESHOLD
&& !(size
& 3) && !(src_offset
& 3) && !(dst_offset
& 3))
438 copy_buffer_shader(cmd_buffer
, src_bo
, dst_bo
,
439 src_offset
, dst_offset
, size
);
441 uint64_t src_va
= radv_buffer_get_va(src_bo
);
442 uint64_t dst_va
= radv_buffer_get_va(dst_bo
);
443 src_va
+= src_offset
;
444 dst_va
+= dst_offset
;
446 radv_cs_add_buffer(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, src_bo
);
447 radv_cs_add_buffer(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, dst_bo
);
449 si_cp_dma_buffer_copy(cmd_buffer
, src_va
, dst_va
, size
);
453 void radv_CmdFillBuffer(
454 VkCommandBuffer commandBuffer
,
456 VkDeviceSize dstOffset
,
457 VkDeviceSize fillSize
,
460 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
461 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
463 if (fillSize
== VK_WHOLE_SIZE
)
464 fillSize
= (dst_buffer
->size
- dstOffset
) & ~3ull;
466 radv_fill_buffer(cmd_buffer
, dst_buffer
->bo
, dst_buffer
->offset
+ dstOffset
,
470 void radv_CmdCopyBuffer(
471 VkCommandBuffer commandBuffer
,
474 uint32_t regionCount
,
475 const VkBufferCopy
* pRegions
)
477 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
478 RADV_FROM_HANDLE(radv_buffer
, src_buffer
, srcBuffer
);
479 RADV_FROM_HANDLE(radv_buffer
, dest_buffer
, destBuffer
);
480 bool old_predicating
;
482 /* VK_EXT_conditional_rendering says that copy commands should not be
483 * affected by conditional rendering.
485 old_predicating
= cmd_buffer
->state
.predicating
;
486 cmd_buffer
->state
.predicating
= false;
488 for (unsigned r
= 0; r
< regionCount
; r
++) {
489 uint64_t src_offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
;
490 uint64_t dest_offset
= dest_buffer
->offset
+ pRegions
[r
].dstOffset
;
491 uint64_t copy_size
= pRegions
[r
].size
;
493 radv_copy_buffer(cmd_buffer
, src_buffer
->bo
, dest_buffer
->bo
,
494 src_offset
, dest_offset
, copy_size
);
497 /* Restore conditional rendering. */
498 cmd_buffer
->state
.predicating
= old_predicating
;
501 void radv_CmdUpdateBuffer(
502 VkCommandBuffer commandBuffer
,
504 VkDeviceSize dstOffset
,
505 VkDeviceSize dataSize
,
508 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
509 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
510 bool mec
= radv_cmd_buffer_uses_mec(cmd_buffer
);
511 uint64_t words
= dataSize
/ 4;
512 uint64_t va
= radv_buffer_get_va(dst_buffer
->bo
);
513 va
+= dstOffset
+ dst_buffer
->offset
;
515 assert(!(dataSize
& 3));
521 if (dataSize
< RADV_BUFFER_UPDATE_THRESHOLD
) {
522 si_emit_cache_flush(cmd_buffer
);
524 radv_cs_add_buffer(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, dst_buffer
->bo
);
526 radeon_check_space(cmd_buffer
->device
->ws
, cmd_buffer
->cs
, words
+ 4);
528 radeon_emit(cmd_buffer
->cs
, PKT3(PKT3_WRITE_DATA
, 2 + words
, 0));
529 radeon_emit(cmd_buffer
->cs
, S_370_DST_SEL(mec
?
530 V_370_MEM
: V_370_MEM_GRBM
) |
531 S_370_WR_CONFIRM(1) |
532 S_370_ENGINE_SEL(V_370_ME
));
533 radeon_emit(cmd_buffer
->cs
, va
);
534 radeon_emit(cmd_buffer
->cs
, va
>> 32);
535 radeon_emit_array(cmd_buffer
->cs
, pData
, words
);
537 if (unlikely(cmd_buffer
->device
->trace_bo
))
538 radv_cmd_buffer_trace_emit(cmd_buffer
);
541 radv_cmd_buffer_upload_data(cmd_buffer
, dataSize
, 32, pData
, &buf_offset
);
542 radv_copy_buffer(cmd_buffer
, cmd_buffer
->upload
.upload_bo
, dst_buffer
->bo
,
543 buf_offset
, dstOffset
+ dst_buffer
->offset
, dataSize
);