radv: gather info about PS inputs in the shader info pass
[mesa.git] / src / amd / vulkan / radv_meta_buffer.c
1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3
4 #include "sid.h"
5 #include "radv_cs.h"
6
7 static nir_shader *
8 build_buffer_fill_shader(struct radv_device *dev)
9 {
10 nir_builder b;
11
12 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14 b.shader->info.cs.local_size[0] = 64;
15 b.shader->info.cs.local_size[1] = 1;
16 b.shader->info.cs.local_size[2] = 1;
17
18 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
19 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
20 nir_ssa_def *block_size = nir_imm_ivec4(&b,
21 b.shader->info.cs.local_size[0],
22 b.shader->info.cs.local_size[1],
23 b.shader->info.cs.local_size[2], 0);
24
25 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26
27 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28 offset = nir_channel(&b, offset, 0);
29
30 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
31 nir_intrinsic_vulkan_resource_index);
32 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
33 dst_buf->num_components = 1;
34 nir_intrinsic_set_desc_set(dst_buf, 0);
35 nir_intrinsic_set_binding(dst_buf, 0);
36 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
37 nir_builder_instr_insert(&b, &dst_buf->instr);
38
39 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
40 nir_intrinsic_set_base(load, 0);
41 nir_intrinsic_set_range(load, 4);
42 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
43 load->num_components = 1;
44 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
45 nir_builder_instr_insert(&b, &load->instr);
46
47 nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4);
48
49 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
50 store->src[0] = nir_src_for_ssa(swizzled_load);
51 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
52 store->src[2] = nir_src_for_ssa(offset);
53 nir_intrinsic_set_write_mask(store, 0xf);
54 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
55 store->num_components = 4;
56 nir_builder_instr_insert(&b, &store->instr);
57
58 return b.shader;
59 }
60
61 static nir_shader *
62 build_buffer_copy_shader(struct radv_device *dev)
63 {
64 nir_builder b;
65
66 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
67 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
68 b.shader->info.cs.local_size[0] = 64;
69 b.shader->info.cs.local_size[1] = 1;
70 b.shader->info.cs.local_size[2] = 1;
71
72 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
73 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
74 nir_ssa_def *block_size = nir_imm_ivec4(&b,
75 b.shader->info.cs.local_size[0],
76 b.shader->info.cs.local_size[1],
77 b.shader->info.cs.local_size[2], 0);
78
79 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
80
81 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
82 offset = nir_channel(&b, offset, 0);
83
84 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
85 nir_intrinsic_vulkan_resource_index);
86 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
87 dst_buf->num_components = 1;
88 nir_intrinsic_set_desc_set(dst_buf, 0);
89 nir_intrinsic_set_binding(dst_buf, 0);
90 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
91 nir_builder_instr_insert(&b, &dst_buf->instr);
92
93 nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
94 nir_intrinsic_vulkan_resource_index);
95 src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
96 src_buf->num_components = 1;
97 nir_intrinsic_set_desc_set(src_buf, 0);
98 nir_intrinsic_set_binding(src_buf, 1);
99 nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, src_buf->num_components, 32, NULL);
100 nir_builder_instr_insert(&b, &src_buf->instr);
101
102 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
103 load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
104 load->src[1] = nir_src_for_ssa(offset);
105 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
106 load->num_components = 4;
107 nir_builder_instr_insert(&b, &load->instr);
108
109 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
110 store->src[0] = nir_src_for_ssa(&load->dest.ssa);
111 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
112 store->src[2] = nir_src_for_ssa(offset);
113 nir_intrinsic_set_write_mask(store, 0xf);
114 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
115 store->num_components = 4;
116 nir_builder_instr_insert(&b, &store->instr);
117
118 return b.shader;
119 }
120
121
122
123 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
124 {
125 VkResult result;
126 struct radv_shader_module fill_cs = { .nir = NULL };
127 struct radv_shader_module copy_cs = { .nir = NULL };
128
129 fill_cs.nir = build_buffer_fill_shader(device);
130 copy_cs.nir = build_buffer_copy_shader(device);
131
132 VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
133 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
134 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
135 .bindingCount = 1,
136 .pBindings = (VkDescriptorSetLayoutBinding[]) {
137 {
138 .binding = 0,
139 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
140 .descriptorCount = 1,
141 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
142 .pImmutableSamplers = NULL
143 },
144 }
145 };
146
147 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
148 &fill_ds_create_info,
149 &device->meta_state.alloc,
150 &device->meta_state.buffer.fill_ds_layout);
151 if (result != VK_SUCCESS)
152 goto fail;
153
154 VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
155 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
156 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
157 .bindingCount = 2,
158 .pBindings = (VkDescriptorSetLayoutBinding[]) {
159 {
160 .binding = 0,
161 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
162 .descriptorCount = 1,
163 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
164 .pImmutableSamplers = NULL
165 },
166 {
167 .binding = 1,
168 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
169 .descriptorCount = 1,
170 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
171 .pImmutableSamplers = NULL
172 },
173 }
174 };
175
176 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
177 &copy_ds_create_info,
178 &device->meta_state.alloc,
179 &device->meta_state.buffer.copy_ds_layout);
180 if (result != VK_SUCCESS)
181 goto fail;
182
183
184 VkPipelineLayoutCreateInfo fill_pl_create_info = {
185 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
186 .setLayoutCount = 1,
187 .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
188 .pushConstantRangeCount = 1,
189 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
190 };
191
192 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
193 &fill_pl_create_info,
194 &device->meta_state.alloc,
195 &device->meta_state.buffer.fill_p_layout);
196 if (result != VK_SUCCESS)
197 goto fail;
198
199 VkPipelineLayoutCreateInfo copy_pl_create_info = {
200 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
201 .setLayoutCount = 1,
202 .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
203 .pushConstantRangeCount = 0,
204 };
205
206 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
207 &copy_pl_create_info,
208 &device->meta_state.alloc,
209 &device->meta_state.buffer.copy_p_layout);
210 if (result != VK_SUCCESS)
211 goto fail;
212
213 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
214 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
215 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
216 .module = radv_shader_module_to_handle(&fill_cs),
217 .pName = "main",
218 .pSpecializationInfo = NULL,
219 };
220
221 VkComputePipelineCreateInfo fill_vk_pipeline_info = {
222 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
223 .stage = fill_pipeline_shader_stage,
224 .flags = 0,
225 .layout = device->meta_state.buffer.fill_p_layout,
226 };
227
228 result = radv_CreateComputePipelines(radv_device_to_handle(device),
229 radv_pipeline_cache_to_handle(&device->meta_state.cache),
230 1, &fill_vk_pipeline_info, NULL,
231 &device->meta_state.buffer.fill_pipeline);
232 if (result != VK_SUCCESS)
233 goto fail;
234
235 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
236 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
237 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
238 .module = radv_shader_module_to_handle(&copy_cs),
239 .pName = "main",
240 .pSpecializationInfo = NULL,
241 };
242
243 VkComputePipelineCreateInfo copy_vk_pipeline_info = {
244 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
245 .stage = copy_pipeline_shader_stage,
246 .flags = 0,
247 .layout = device->meta_state.buffer.copy_p_layout,
248 };
249
250 result = radv_CreateComputePipelines(radv_device_to_handle(device),
251 radv_pipeline_cache_to_handle(&device->meta_state.cache),
252 1, &copy_vk_pipeline_info, NULL,
253 &device->meta_state.buffer.copy_pipeline);
254 if (result != VK_SUCCESS)
255 goto fail;
256
257 ralloc_free(fill_cs.nir);
258 ralloc_free(copy_cs.nir);
259 return VK_SUCCESS;
260 fail:
261 radv_device_finish_meta_buffer_state(device);
262 ralloc_free(fill_cs.nir);
263 ralloc_free(copy_cs.nir);
264 return result;
265 }
266
267 void radv_device_finish_meta_buffer_state(struct radv_device *device)
268 {
269 struct radv_meta_state *state = &device->meta_state;
270
271 radv_DestroyPipeline(radv_device_to_handle(device),
272 state->buffer.copy_pipeline, &state->alloc);
273 radv_DestroyPipeline(radv_device_to_handle(device),
274 state->buffer.fill_pipeline, &state->alloc);
275 radv_DestroyPipelineLayout(radv_device_to_handle(device),
276 state->buffer.copy_p_layout, &state->alloc);
277 radv_DestroyPipelineLayout(radv_device_to_handle(device),
278 state->buffer.fill_p_layout, &state->alloc);
279 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
280 state->buffer.copy_ds_layout,
281 &state->alloc);
282 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
283 state->buffer.fill_ds_layout,
284 &state->alloc);
285 }
286
287 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
288 struct radeon_winsys_bo *bo,
289 uint64_t offset, uint64_t size, uint32_t value)
290 {
291 struct radv_device *device = cmd_buffer->device;
292 uint64_t block_count = round_up_u64(size, 1024);
293 struct radv_meta_saved_state saved_state;
294
295 radv_meta_save(&saved_state, cmd_buffer,
296 RADV_META_SAVE_COMPUTE_PIPELINE |
297 RADV_META_SAVE_CONSTANTS |
298 RADV_META_SAVE_DESCRIPTORS);
299
300 struct radv_buffer dst_buffer = {
301 .bo = bo,
302 .offset = offset,
303 .size = size
304 };
305
306 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
307 VK_PIPELINE_BIND_POINT_COMPUTE,
308 device->meta_state.buffer.fill_pipeline);
309
310 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
311 device->meta_state.buffer.fill_p_layout,
312 0, /* set */
313 1, /* descriptorWriteCount */
314 (VkWriteDescriptorSet[]) {
315 {
316 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
317 .dstBinding = 0,
318 .dstArrayElement = 0,
319 .descriptorCount = 1,
320 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
321 .pBufferInfo = &(VkDescriptorBufferInfo) {
322 .buffer = radv_buffer_to_handle(&dst_buffer),
323 .offset = 0,
324 .range = size
325 }
326 }
327 });
328
329 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
330 device->meta_state.buffer.fill_p_layout,
331 VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
332 &value);
333
334 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
335
336 radv_meta_restore(&saved_state, cmd_buffer);
337 }
338
339 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
340 struct radeon_winsys_bo *src_bo,
341 struct radeon_winsys_bo *dst_bo,
342 uint64_t src_offset, uint64_t dst_offset,
343 uint64_t size)
344 {
345 struct radv_device *device = cmd_buffer->device;
346 uint64_t block_count = round_up_u64(size, 1024);
347 struct radv_meta_saved_state saved_state;
348
349 radv_meta_save(&saved_state, cmd_buffer,
350 RADV_META_SAVE_COMPUTE_PIPELINE |
351 RADV_META_SAVE_DESCRIPTORS);
352
353 struct radv_buffer dst_buffer = {
354 .bo = dst_bo,
355 .offset = dst_offset,
356 .size = size
357 };
358
359 struct radv_buffer src_buffer = {
360 .bo = src_bo,
361 .offset = src_offset,
362 .size = size
363 };
364
365 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
366 VK_PIPELINE_BIND_POINT_COMPUTE,
367 device->meta_state.buffer.copy_pipeline);
368
369 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
370 device->meta_state.buffer.copy_p_layout,
371 0, /* set */
372 2, /* descriptorWriteCount */
373 (VkWriteDescriptorSet[]) {
374 {
375 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
376 .dstBinding = 0,
377 .dstArrayElement = 0,
378 .descriptorCount = 1,
379 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
380 .pBufferInfo = &(VkDescriptorBufferInfo) {
381 .buffer = radv_buffer_to_handle(&dst_buffer),
382 .offset = 0,
383 .range = size
384 }
385 },
386 {
387 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
388 .dstBinding = 1,
389 .dstArrayElement = 0,
390 .descriptorCount = 1,
391 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
392 .pBufferInfo = &(VkDescriptorBufferInfo) {
393 .buffer = radv_buffer_to_handle(&src_buffer),
394 .offset = 0,
395 .range = size
396 }
397 }
398 });
399
400 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
401
402 radv_meta_restore(&saved_state, cmd_buffer);
403 }
404
405
406 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
407 struct radeon_winsys_bo *bo,
408 uint64_t offset, uint64_t size, uint32_t value)
409 {
410 uint32_t flush_bits = 0;
411
412 assert(!(offset & 3));
413 assert(!(size & 3));
414
415 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
416 fill_buffer_shader(cmd_buffer, bo, offset, size, value);
417 flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
418 RADV_CMD_FLAG_INV_VCACHE |
419 RADV_CMD_FLAG_WB_L2;
420 } else if (size) {
421 uint64_t va = radv_buffer_get_va(bo);
422 va += offset;
423 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
424 si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
425 }
426
427 return flush_bits;
428 }
429
430 static
431 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
432 struct radeon_winsys_bo *src_bo,
433 struct radeon_winsys_bo *dst_bo,
434 uint64_t src_offset, uint64_t dst_offset,
435 uint64_t size)
436 {
437 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
438 copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
439 src_offset, dst_offset, size);
440 else if (size) {
441 uint64_t src_va = radv_buffer_get_va(src_bo);
442 uint64_t dst_va = radv_buffer_get_va(dst_bo);
443 src_va += src_offset;
444 dst_va += dst_offset;
445
446 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
447 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
448
449 si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
450 }
451 }
452
453 void radv_CmdFillBuffer(
454 VkCommandBuffer commandBuffer,
455 VkBuffer dstBuffer,
456 VkDeviceSize dstOffset,
457 VkDeviceSize fillSize,
458 uint32_t data)
459 {
460 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
461 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
462
463 if (fillSize == VK_WHOLE_SIZE)
464 fillSize = (dst_buffer->size - dstOffset) & ~3ull;
465
466 radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
467 fillSize, data);
468 }
469
470 void radv_CmdCopyBuffer(
471 VkCommandBuffer commandBuffer,
472 VkBuffer srcBuffer,
473 VkBuffer destBuffer,
474 uint32_t regionCount,
475 const VkBufferCopy* pRegions)
476 {
477 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
478 RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
479 RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
480 bool old_predicating;
481
482 /* VK_EXT_conditional_rendering says that copy commands should not be
483 * affected by conditional rendering.
484 */
485 old_predicating = cmd_buffer->state.predicating;
486 cmd_buffer->state.predicating = false;
487
488 for (unsigned r = 0; r < regionCount; r++) {
489 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
490 uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
491 uint64_t copy_size = pRegions[r].size;
492
493 radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
494 src_offset, dest_offset, copy_size);
495 }
496
497 /* Restore conditional rendering. */
498 cmd_buffer->state.predicating = old_predicating;
499 }
500
501 void radv_CmdUpdateBuffer(
502 VkCommandBuffer commandBuffer,
503 VkBuffer dstBuffer,
504 VkDeviceSize dstOffset,
505 VkDeviceSize dataSize,
506 const void* pData)
507 {
508 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
509 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
510 bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
511 uint64_t words = dataSize / 4;
512 uint64_t va = radv_buffer_get_va(dst_buffer->bo);
513 va += dstOffset + dst_buffer->offset;
514
515 assert(!(dataSize & 3));
516 assert(!(va & 3));
517
518 if (!dataSize)
519 return;
520
521 if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
522 si_emit_cache_flush(cmd_buffer);
523
524 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
525
526 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
527
528 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
529 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
530 V_370_MEM : V_370_MEM_GRBM) |
531 S_370_WR_CONFIRM(1) |
532 S_370_ENGINE_SEL(V_370_ME));
533 radeon_emit(cmd_buffer->cs, va);
534 radeon_emit(cmd_buffer->cs, va >> 32);
535 radeon_emit_array(cmd_buffer->cs, pData, words);
536
537 if (unlikely(cmd_buffer->device->trace_bo))
538 radv_cmd_buffer_trace_emit(cmd_buffer);
539 } else {
540 uint32_t buf_offset;
541 radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
542 radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
543 buf_offset, dstOffset + dst_buffer->offset, dataSize);
544 }
545 }