radv: add initial non-conformant radv vulkan driver
[mesa.git] / src / amd / vulkan / radv_meta_buffer.c
1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3
4 #include "sid.h"
5 #include "radv_cs.h"
6
7 static nir_shader *
8 build_buffer_fill_shader(struct radv_device *dev)
9 {
10 nir_builder b;
11
12 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14 b.shader->info.cs.local_size[0] = 64;
15 b.shader->info.cs.local_size[1] = 1;
16 b.shader->info.cs.local_size[2] = 1;
17
18 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
19 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
20 nir_ssa_def *block_size = nir_imm_ivec4(&b,
21 b.shader->info.cs.local_size[0],
22 b.shader->info.cs.local_size[1],
23 b.shader->info.cs.local_size[2], 0);
24
25 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26
27 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28 offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
29
30 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
31 nir_intrinsic_vulkan_resource_index);
32 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
33 nir_intrinsic_set_desc_set(dst_buf, 0);
34 nir_intrinsic_set_binding(dst_buf, 0);
35 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
36 nir_builder_instr_insert(&b, &dst_buf->instr);
37
38 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
39 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
40 load->num_components = 1;
41 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
42 nir_builder_instr_insert(&b, &load->instr);
43
44 nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
45
46 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
47 store->src[0] = nir_src_for_ssa(swizzled_load);
48 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
49 store->src[2] = nir_src_for_ssa(offset);
50 nir_intrinsic_set_write_mask(store, 0xf);
51 store->num_components = 4;
52 nir_builder_instr_insert(&b, &store->instr);
53
54 return b.shader;
55 }
56
57 static nir_shader *
58 build_buffer_copy_shader(struct radv_device *dev)
59 {
60 nir_builder b;
61
62 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
63 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
64 b.shader->info.cs.local_size[0] = 64;
65 b.shader->info.cs.local_size[1] = 1;
66 b.shader->info.cs.local_size[2] = 1;
67
68 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
69 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
70 nir_ssa_def *block_size = nir_imm_ivec4(&b,
71 b.shader->info.cs.local_size[0],
72 b.shader->info.cs.local_size[1],
73 b.shader->info.cs.local_size[2], 0);
74
75 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
76
77 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
78 offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
79
80 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
81 nir_intrinsic_vulkan_resource_index);
82 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
83 nir_intrinsic_set_desc_set(dst_buf, 0);
84 nir_intrinsic_set_binding(dst_buf, 0);
85 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
86 nir_builder_instr_insert(&b, &dst_buf->instr);
87
88 nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
89 nir_intrinsic_vulkan_resource_index);
90 src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
91 nir_intrinsic_set_desc_set(src_buf, 0);
92 nir_intrinsic_set_binding(src_buf, 1);
93 nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
94 nir_builder_instr_insert(&b, &src_buf->instr);
95
96 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
97 load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
98 load->src[1] = nir_src_for_ssa(offset);
99 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
100 load->num_components = 4;
101 nir_builder_instr_insert(&b, &load->instr);
102
103 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
104 store->src[0] = nir_src_for_ssa(&load->dest.ssa);
105 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
106 store->src[2] = nir_src_for_ssa(offset);
107 nir_intrinsic_set_write_mask(store, 0xf);
108 store->num_components = 4;
109 nir_builder_instr_insert(&b, &store->instr);
110
111 return b.shader;
112 }
113
114
115
116 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
117 {
118 VkResult result;
119 struct radv_shader_module fill_cs = { .nir = NULL };
120 struct radv_shader_module copy_cs = { .nir = NULL };
121
122 zero(device->meta_state.buffer);
123
124 fill_cs.nir = build_buffer_fill_shader(device);
125 copy_cs.nir = build_buffer_copy_shader(device);
126
127 VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
128 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
129 .bindingCount = 1,
130 .pBindings = (VkDescriptorSetLayoutBinding[]) {
131 {
132 .binding = 0,
133 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
134 .descriptorCount = 1,
135 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
136 .pImmutableSamplers = NULL
137 },
138 }
139 };
140
141 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
142 &fill_ds_create_info,
143 &device->meta_state.alloc,
144 &device->meta_state.buffer.fill_ds_layout);
145 if (result != VK_SUCCESS)
146 goto fail;
147
148 VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
149 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
150 .bindingCount = 2,
151 .pBindings = (VkDescriptorSetLayoutBinding[]) {
152 {
153 .binding = 0,
154 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
155 .descriptorCount = 1,
156 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
157 .pImmutableSamplers = NULL
158 },
159 {
160 .binding = 1,
161 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
162 .descriptorCount = 1,
163 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
164 .pImmutableSamplers = NULL
165 },
166 }
167 };
168
169 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
170 &copy_ds_create_info,
171 &device->meta_state.alloc,
172 &device->meta_state.buffer.copy_ds_layout);
173 if (result != VK_SUCCESS)
174 goto fail;
175
176
177 VkPipelineLayoutCreateInfo fill_pl_create_info = {
178 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
179 .setLayoutCount = 1,
180 .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
181 .pushConstantRangeCount = 1,
182 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
183 };
184
185 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
186 &fill_pl_create_info,
187 &device->meta_state.alloc,
188 &device->meta_state.buffer.fill_p_layout);
189 if (result != VK_SUCCESS)
190 goto fail;
191
192 VkPipelineLayoutCreateInfo copy_pl_create_info = {
193 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
194 .setLayoutCount = 1,
195 .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
196 .pushConstantRangeCount = 0,
197 };
198
199 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
200 &copy_pl_create_info,
201 &device->meta_state.alloc,
202 &device->meta_state.buffer.copy_p_layout);
203 if (result != VK_SUCCESS)
204 goto fail;
205
206 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
207 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
208 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
209 .module = radv_shader_module_to_handle(&fill_cs),
210 .pName = "main",
211 .pSpecializationInfo = NULL,
212 };
213
214 VkComputePipelineCreateInfo fill_vk_pipeline_info = {
215 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
216 .stage = fill_pipeline_shader_stage,
217 .flags = 0,
218 .layout = device->meta_state.buffer.fill_p_layout,
219 };
220
221 result = radv_CreateComputePipelines(radv_device_to_handle(device),
222 radv_pipeline_cache_to_handle(&device->meta_state.cache),
223 1, &fill_vk_pipeline_info, NULL,
224 &device->meta_state.buffer.fill_pipeline);
225 if (result != VK_SUCCESS)
226 goto fail;
227
228 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
229 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
230 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
231 .module = radv_shader_module_to_handle(&copy_cs),
232 .pName = "main",
233 .pSpecializationInfo = NULL,
234 };
235
236 VkComputePipelineCreateInfo copy_vk_pipeline_info = {
237 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
238 .stage = copy_pipeline_shader_stage,
239 .flags = 0,
240 .layout = device->meta_state.buffer.copy_p_layout,
241 };
242
243 result = radv_CreateComputePipelines(radv_device_to_handle(device),
244 radv_pipeline_cache_to_handle(&device->meta_state.cache),
245 1, &copy_vk_pipeline_info, NULL,
246 &device->meta_state.buffer.copy_pipeline);
247 if (result != VK_SUCCESS)
248 goto fail;
249
250 ralloc_free(fill_cs.nir);
251 ralloc_free(copy_cs.nir);
252 return VK_SUCCESS;
253 fail:
254 radv_device_finish_meta_buffer_state(device);
255 ralloc_free(fill_cs.nir);
256 ralloc_free(copy_cs.nir);
257 return result;
258 }
259
260 void radv_device_finish_meta_buffer_state(struct radv_device *device)
261 {
262 if (device->meta_state.buffer.copy_pipeline)
263 radv_DestroyPipeline(radv_device_to_handle(device),
264 device->meta_state.buffer.copy_pipeline,
265 &device->meta_state.alloc);
266
267 if (device->meta_state.buffer.fill_pipeline)
268 radv_DestroyPipeline(radv_device_to_handle(device),
269 device->meta_state.buffer.fill_pipeline,
270 &device->meta_state.alloc);
271
272 if (device->meta_state.buffer.copy_p_layout)
273 radv_DestroyPipelineLayout(radv_device_to_handle(device),
274 device->meta_state.buffer.copy_p_layout,
275 &device->meta_state.alloc);
276
277 if (device->meta_state.buffer.fill_p_layout)
278 radv_DestroyPipelineLayout(radv_device_to_handle(device),
279 device->meta_state.buffer.fill_p_layout,
280 &device->meta_state.alloc);
281
282 if (device->meta_state.buffer.copy_ds_layout)
283 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
284 device->meta_state.buffer.copy_ds_layout,
285 &device->meta_state.alloc);
286
287 if (device->meta_state.buffer.fill_ds_layout)
288 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
289 device->meta_state.buffer.fill_ds_layout,
290 &device->meta_state.alloc);
291 }
292
293 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
294 struct radeon_winsys_bo *bo,
295 uint64_t offset, uint64_t size, uint32_t value)
296 {
297 struct radv_device *device = cmd_buffer->device;
298 uint64_t block_count = round_up_u64(size, 1024);
299 struct radv_meta_saved_compute_state saved_state;
300 VkDescriptorSet ds;
301
302 radv_meta_save_compute(&saved_state, cmd_buffer, 4);
303
304 radv_temp_descriptor_set_create(device, cmd_buffer,
305 device->meta_state.buffer.fill_ds_layout,
306 &ds);
307
308 struct radv_buffer dst_buffer = {
309 .bo = bo,
310 .offset = offset,
311 .size = size
312 };
313
314 radv_UpdateDescriptorSets(radv_device_to_handle(device),
315 1, /* writeCount */
316 (VkWriteDescriptorSet[]) {
317 {
318 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
319 .dstSet = ds,
320 .dstBinding = 0,
321 .dstArrayElement = 0,
322 .descriptorCount = 1,
323 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
324 .pBufferInfo = &(VkDescriptorBufferInfo) {
325 .buffer = radv_buffer_to_handle(&dst_buffer),
326 .offset = 0,
327 .range = size
328 }
329 }
330 }, 0, NULL);
331
332 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
333 VK_PIPELINE_BIND_POINT_COMPUTE,
334 device->meta_state.buffer.fill_pipeline);
335
336 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
337 VK_PIPELINE_BIND_POINT_COMPUTE,
338 device->meta_state.buffer.fill_p_layout, 0, 1,
339 &ds, 0, NULL);
340
341 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
342 device->meta_state.buffer.fill_p_layout,
343 VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
344 &value);
345
346 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
347
348 radv_temp_descriptor_set_destroy(device, ds);
349
350 radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
351 }
352
353 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
354 struct radeon_winsys_bo *src_bo,
355 struct radeon_winsys_bo *dst_bo,
356 uint64_t src_offset, uint64_t dst_offset,
357 uint64_t size)
358 {
359 struct radv_device *device = cmd_buffer->device;
360 uint64_t block_count = round_up_u64(size, 1024);
361 struct radv_meta_saved_compute_state saved_state;
362 VkDescriptorSet ds;
363
364 radv_meta_save_compute(&saved_state, cmd_buffer, 0);
365
366 radv_temp_descriptor_set_create(device, cmd_buffer,
367 device->meta_state.buffer.copy_ds_layout,
368 &ds);
369
370 struct radv_buffer dst_buffer = {
371 .bo = dst_bo,
372 .offset = dst_offset,
373 .size = size
374 };
375
376 struct radv_buffer src_buffer = {
377 .bo = src_bo,
378 .offset = src_offset,
379 .size = size
380 };
381
382 radv_UpdateDescriptorSets(radv_device_to_handle(device),
383 2, /* writeCount */
384 (VkWriteDescriptorSet[]) {
385 {
386 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
387 .dstSet = ds,
388 .dstBinding = 0,
389 .dstArrayElement = 0,
390 .descriptorCount = 1,
391 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
392 .pBufferInfo = &(VkDescriptorBufferInfo) {
393 .buffer = radv_buffer_to_handle(&dst_buffer),
394 .offset = 0,
395 .range = size
396 }
397 },
398 {
399 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
400 .dstSet = ds,
401 .dstBinding = 1,
402 .dstArrayElement = 0,
403 .descriptorCount = 1,
404 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
405 .pBufferInfo = &(VkDescriptorBufferInfo) {
406 .buffer = radv_buffer_to_handle(&src_buffer),
407 .offset = 0,
408 .range = size
409 }
410 }
411 }, 0, NULL);
412
413 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
414 VK_PIPELINE_BIND_POINT_COMPUTE,
415 device->meta_state.buffer.copy_pipeline);
416
417 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
418 VK_PIPELINE_BIND_POINT_COMPUTE,
419 device->meta_state.buffer.copy_p_layout, 0, 1,
420 &ds, 0, NULL);
421
422
423 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
424
425 radv_temp_descriptor_set_destroy(device, ds);
426
427 radv_meta_restore_compute(&saved_state, cmd_buffer, 0);
428 }
429
430
431 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
432 struct radeon_winsys_bo *bo,
433 uint64_t offset, uint64_t size, uint32_t value)
434 {
435 assert(!(offset & 3));
436 assert(!(size & 3));
437
438 if (size >= 4096)
439 fill_buffer_shader(cmd_buffer, bo, offset, size, value);
440 else if (size) {
441 uint64_t va = cmd_buffer->device->ws->buffer_get_va(bo);
442 va += offset;
443 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8);
444 si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
445 }
446 }
447
448 static
449 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
450 struct radeon_winsys_bo *src_bo,
451 struct radeon_winsys_bo *dst_bo,
452 uint64_t src_offset, uint64_t dst_offset,
453 uint64_t size)
454 {
455 if (size >= 4096 && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
456 copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
457 src_offset, dst_offset, size);
458 else if (size) {
459 uint64_t src_va = cmd_buffer->device->ws->buffer_get_va(src_bo);
460 uint64_t dst_va = cmd_buffer->device->ws->buffer_get_va(dst_bo);
461 src_va += src_offset;
462 dst_va += dst_offset;
463
464 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, src_bo, 8);
465 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_bo, 8);
466
467 si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
468 }
469 }
470
471 void radv_CmdFillBuffer(
472 VkCommandBuffer commandBuffer,
473 VkBuffer dstBuffer,
474 VkDeviceSize dstOffset,
475 VkDeviceSize fillSize,
476 uint32_t data)
477 {
478 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
479 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
480
481 if (fillSize == VK_WHOLE_SIZE)
482 fillSize = (dst_buffer->size - dstOffset) & ~3ull;
483
484 radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
485 fillSize, data);
486 }
487
488 void radv_CmdCopyBuffer(
489 VkCommandBuffer commandBuffer,
490 VkBuffer srcBuffer,
491 VkBuffer destBuffer,
492 uint32_t regionCount,
493 const VkBufferCopy* pRegions)
494 {
495 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
496 RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
497 RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
498
499 for (unsigned r = 0; r < regionCount; r++) {
500 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
501 uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
502 uint64_t copy_size = pRegions[r].size;
503
504 radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
505 src_offset, dest_offset, copy_size);
506 }
507 }
508
509 void radv_CmdUpdateBuffer(
510 VkCommandBuffer commandBuffer,
511 VkBuffer dstBuffer,
512 VkDeviceSize dstOffset,
513 VkDeviceSize dataSize,
514 const uint32_t* pData)
515 {
516 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
517 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
518 uint64_t words = dataSize / 4;
519 uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
520 va += dstOffset + dst_buffer->offset;
521
522 assert(!(dataSize & 3));
523 assert(!(va & 3));
524
525 if (dataSize < 4096) {
526 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
527
528 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
529
530 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
531 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
532 S_370_WR_CONFIRM(1) |
533 S_370_ENGINE_SEL(V_370_ME));
534 radeon_emit(cmd_buffer->cs, va);
535 radeon_emit(cmd_buffer->cs, va >> 32);
536 radeon_emit_array(cmd_buffer->cs, pData, words);
537 } else {
538 uint32_t buf_offset;
539 radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
540 radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
541 buf_offset, dstOffset + dst_buffer->offset, dataSize);
542 }
543 }