nir: Add new system values and intrinsics for dealing with CL work offsets
[mesa.git] / src / amd / vulkan / radv_meta_buffer.c
1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3
4 #include "sid.h"
5 #include "radv_cs.h"
6
7 static nir_shader *
8 build_buffer_fill_shader(struct radv_device *dev)
9 {
10 nir_builder b;
11
12 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14 b.shader->info.cs.local_size[0] = 64;
15 b.shader->info.cs.local_size[1] = 1;
16 b.shader->info.cs.local_size[2] = 1;
17
18 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
19 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
20 nir_ssa_def *block_size = nir_imm_ivec4(&b,
21 b.shader->info.cs.local_size[0],
22 b.shader->info.cs.local_size[1],
23 b.shader->info.cs.local_size[2], 0);
24
25 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26
27 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28 offset = nir_channel(&b, offset, 0);
29
30 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
31 nir_intrinsic_vulkan_resource_index);
32 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
33 dst_buf->num_components = 1;
34 nir_intrinsic_set_desc_set(dst_buf, 0);
35 nir_intrinsic_set_binding(dst_buf, 0);
36 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
37 nir_builder_instr_insert(&b, &dst_buf->instr);
38
39 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
40 nir_intrinsic_set_base(load, 0);
41 nir_intrinsic_set_range(load, 4);
42 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
43 load->num_components = 1;
44 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
45 nir_builder_instr_insert(&b, &load->instr);
46
47 nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4);
48
49 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
50 store->src[0] = nir_src_for_ssa(swizzled_load);
51 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
52 store->src[2] = nir_src_for_ssa(offset);
53 nir_intrinsic_set_write_mask(store, 0xf);
54 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
55 nir_intrinsic_set_align(store, 16, 0);
56 store->num_components = 4;
57 nir_builder_instr_insert(&b, &store->instr);
58
59 return b.shader;
60 }
61
62 static nir_shader *
63 build_buffer_copy_shader(struct radv_device *dev)
64 {
65 nir_builder b;
66
67 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
68 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
69 b.shader->info.cs.local_size[0] = 64;
70 b.shader->info.cs.local_size[1] = 1;
71 b.shader->info.cs.local_size[2] = 1;
72
73 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
74 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
75 nir_ssa_def *block_size = nir_imm_ivec4(&b,
76 b.shader->info.cs.local_size[0],
77 b.shader->info.cs.local_size[1],
78 b.shader->info.cs.local_size[2], 0);
79
80 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
81
82 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
83 offset = nir_channel(&b, offset, 0);
84
85 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
86 nir_intrinsic_vulkan_resource_index);
87 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
88 dst_buf->num_components = 1;
89 nir_intrinsic_set_desc_set(dst_buf, 0);
90 nir_intrinsic_set_binding(dst_buf, 0);
91 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
92 nir_builder_instr_insert(&b, &dst_buf->instr);
93
94 nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
95 nir_intrinsic_vulkan_resource_index);
96 src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
97 src_buf->num_components = 1;
98 nir_intrinsic_set_desc_set(src_buf, 0);
99 nir_intrinsic_set_binding(src_buf, 1);
100 nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, src_buf->num_components, 32, NULL);
101 nir_builder_instr_insert(&b, &src_buf->instr);
102
103 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
104 load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
105 load->src[1] = nir_src_for_ssa(offset);
106 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
107 load->num_components = 4;
108 nir_intrinsic_set_align(load, 16, 0);
109 nir_builder_instr_insert(&b, &load->instr);
110
111 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
112 store->src[0] = nir_src_for_ssa(&load->dest.ssa);
113 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
114 store->src[2] = nir_src_for_ssa(offset);
115 nir_intrinsic_set_write_mask(store, 0xf);
116 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
117 nir_intrinsic_set_align(store, 16, 0);
118 store->num_components = 4;
119 nir_builder_instr_insert(&b, &store->instr);
120
121 return b.shader;
122 }
123
124
125
126 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
127 {
128 VkResult result;
129 struct radv_shader_module fill_cs = { .nir = NULL };
130 struct radv_shader_module copy_cs = { .nir = NULL };
131
132 fill_cs.nir = build_buffer_fill_shader(device);
133 copy_cs.nir = build_buffer_copy_shader(device);
134
135 VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
136 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
137 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
138 .bindingCount = 1,
139 .pBindings = (VkDescriptorSetLayoutBinding[]) {
140 {
141 .binding = 0,
142 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
143 .descriptorCount = 1,
144 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
145 .pImmutableSamplers = NULL
146 },
147 }
148 };
149
150 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
151 &fill_ds_create_info,
152 &device->meta_state.alloc,
153 &device->meta_state.buffer.fill_ds_layout);
154 if (result != VK_SUCCESS)
155 goto fail;
156
157 VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
158 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
159 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
160 .bindingCount = 2,
161 .pBindings = (VkDescriptorSetLayoutBinding[]) {
162 {
163 .binding = 0,
164 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
165 .descriptorCount = 1,
166 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
167 .pImmutableSamplers = NULL
168 },
169 {
170 .binding = 1,
171 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
172 .descriptorCount = 1,
173 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
174 .pImmutableSamplers = NULL
175 },
176 }
177 };
178
179 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
180 &copy_ds_create_info,
181 &device->meta_state.alloc,
182 &device->meta_state.buffer.copy_ds_layout);
183 if (result != VK_SUCCESS)
184 goto fail;
185
186
187 VkPipelineLayoutCreateInfo fill_pl_create_info = {
188 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
189 .setLayoutCount = 1,
190 .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
191 .pushConstantRangeCount = 1,
192 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
193 };
194
195 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
196 &fill_pl_create_info,
197 &device->meta_state.alloc,
198 &device->meta_state.buffer.fill_p_layout);
199 if (result != VK_SUCCESS)
200 goto fail;
201
202 VkPipelineLayoutCreateInfo copy_pl_create_info = {
203 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
204 .setLayoutCount = 1,
205 .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
206 .pushConstantRangeCount = 0,
207 };
208
209 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
210 &copy_pl_create_info,
211 &device->meta_state.alloc,
212 &device->meta_state.buffer.copy_p_layout);
213 if (result != VK_SUCCESS)
214 goto fail;
215
216 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
217 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
218 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
219 .module = radv_shader_module_to_handle(&fill_cs),
220 .pName = "main",
221 .pSpecializationInfo = NULL,
222 };
223
224 VkComputePipelineCreateInfo fill_vk_pipeline_info = {
225 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
226 .stage = fill_pipeline_shader_stage,
227 .flags = 0,
228 .layout = device->meta_state.buffer.fill_p_layout,
229 };
230
231 result = radv_CreateComputePipelines(radv_device_to_handle(device),
232 radv_pipeline_cache_to_handle(&device->meta_state.cache),
233 1, &fill_vk_pipeline_info, NULL,
234 &device->meta_state.buffer.fill_pipeline);
235 if (result != VK_SUCCESS)
236 goto fail;
237
238 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
239 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
240 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
241 .module = radv_shader_module_to_handle(&copy_cs),
242 .pName = "main",
243 .pSpecializationInfo = NULL,
244 };
245
246 VkComputePipelineCreateInfo copy_vk_pipeline_info = {
247 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
248 .stage = copy_pipeline_shader_stage,
249 .flags = 0,
250 .layout = device->meta_state.buffer.copy_p_layout,
251 };
252
253 result = radv_CreateComputePipelines(radv_device_to_handle(device),
254 radv_pipeline_cache_to_handle(&device->meta_state.cache),
255 1, &copy_vk_pipeline_info, NULL,
256 &device->meta_state.buffer.copy_pipeline);
257 if (result != VK_SUCCESS)
258 goto fail;
259
260 ralloc_free(fill_cs.nir);
261 ralloc_free(copy_cs.nir);
262 return VK_SUCCESS;
263 fail:
264 radv_device_finish_meta_buffer_state(device);
265 ralloc_free(fill_cs.nir);
266 ralloc_free(copy_cs.nir);
267 return result;
268 }
269
270 void radv_device_finish_meta_buffer_state(struct radv_device *device)
271 {
272 struct radv_meta_state *state = &device->meta_state;
273
274 radv_DestroyPipeline(radv_device_to_handle(device),
275 state->buffer.copy_pipeline, &state->alloc);
276 radv_DestroyPipeline(radv_device_to_handle(device),
277 state->buffer.fill_pipeline, &state->alloc);
278 radv_DestroyPipelineLayout(radv_device_to_handle(device),
279 state->buffer.copy_p_layout, &state->alloc);
280 radv_DestroyPipelineLayout(radv_device_to_handle(device),
281 state->buffer.fill_p_layout, &state->alloc);
282 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
283 state->buffer.copy_ds_layout,
284 &state->alloc);
285 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
286 state->buffer.fill_ds_layout,
287 &state->alloc);
288 }
289
290 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
291 struct radeon_winsys_bo *bo,
292 uint64_t offset, uint64_t size, uint32_t value)
293 {
294 struct radv_device *device = cmd_buffer->device;
295 uint64_t block_count = round_up_u64(size, 1024);
296 struct radv_meta_saved_state saved_state;
297
298 radv_meta_save(&saved_state, cmd_buffer,
299 RADV_META_SAVE_COMPUTE_PIPELINE |
300 RADV_META_SAVE_CONSTANTS |
301 RADV_META_SAVE_DESCRIPTORS);
302
303 struct radv_buffer dst_buffer = {
304 .bo = bo,
305 .offset = offset,
306 .size = size
307 };
308
309 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
310 VK_PIPELINE_BIND_POINT_COMPUTE,
311 device->meta_state.buffer.fill_pipeline);
312
313 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
314 device->meta_state.buffer.fill_p_layout,
315 0, /* set */
316 1, /* descriptorWriteCount */
317 (VkWriteDescriptorSet[]) {
318 {
319 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
320 .dstBinding = 0,
321 .dstArrayElement = 0,
322 .descriptorCount = 1,
323 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
324 .pBufferInfo = &(VkDescriptorBufferInfo) {
325 .buffer = radv_buffer_to_handle(&dst_buffer),
326 .offset = 0,
327 .range = size
328 }
329 }
330 });
331
332 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
333 device->meta_state.buffer.fill_p_layout,
334 VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
335 &value);
336
337 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
338
339 radv_meta_restore(&saved_state, cmd_buffer);
340 }
341
342 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
343 struct radeon_winsys_bo *src_bo,
344 struct radeon_winsys_bo *dst_bo,
345 uint64_t src_offset, uint64_t dst_offset,
346 uint64_t size)
347 {
348 struct radv_device *device = cmd_buffer->device;
349 uint64_t block_count = round_up_u64(size, 1024);
350 struct radv_meta_saved_state saved_state;
351
352 radv_meta_save(&saved_state, cmd_buffer,
353 RADV_META_SAVE_COMPUTE_PIPELINE |
354 RADV_META_SAVE_DESCRIPTORS);
355
356 struct radv_buffer dst_buffer = {
357 .bo = dst_bo,
358 .offset = dst_offset,
359 .size = size
360 };
361
362 struct radv_buffer src_buffer = {
363 .bo = src_bo,
364 .offset = src_offset,
365 .size = size
366 };
367
368 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
369 VK_PIPELINE_BIND_POINT_COMPUTE,
370 device->meta_state.buffer.copy_pipeline);
371
372 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
373 device->meta_state.buffer.copy_p_layout,
374 0, /* set */
375 2, /* descriptorWriteCount */
376 (VkWriteDescriptorSet[]) {
377 {
378 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
379 .dstBinding = 0,
380 .dstArrayElement = 0,
381 .descriptorCount = 1,
382 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
383 .pBufferInfo = &(VkDescriptorBufferInfo) {
384 .buffer = radv_buffer_to_handle(&dst_buffer),
385 .offset = 0,
386 .range = size
387 }
388 },
389 {
390 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
391 .dstBinding = 1,
392 .dstArrayElement = 0,
393 .descriptorCount = 1,
394 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
395 .pBufferInfo = &(VkDescriptorBufferInfo) {
396 .buffer = radv_buffer_to_handle(&src_buffer),
397 .offset = 0,
398 .range = size
399 }
400 }
401 });
402
403 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
404
405 radv_meta_restore(&saved_state, cmd_buffer);
406 }
407
408
409 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
410 struct radeon_winsys_bo *bo,
411 uint64_t offset, uint64_t size, uint32_t value)
412 {
413 uint32_t flush_bits = 0;
414
415 assert(!(offset & 3));
416 assert(!(size & 3));
417
418 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
419 fill_buffer_shader(cmd_buffer, bo, offset, size, value);
420 flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
421 RADV_CMD_FLAG_INV_VCACHE |
422 RADV_CMD_FLAG_WB_L2;
423 } else if (size) {
424 uint64_t va = radv_buffer_get_va(bo);
425 va += offset;
426 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
427 si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
428 }
429
430 return flush_bits;
431 }
432
433 static
434 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
435 struct radeon_winsys_bo *src_bo,
436 struct radeon_winsys_bo *dst_bo,
437 uint64_t src_offset, uint64_t dst_offset,
438 uint64_t size)
439 {
440 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
441 copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
442 src_offset, dst_offset, size);
443 else if (size) {
444 uint64_t src_va = radv_buffer_get_va(src_bo);
445 uint64_t dst_va = radv_buffer_get_va(dst_bo);
446 src_va += src_offset;
447 dst_va += dst_offset;
448
449 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
450 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
451
452 si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
453 }
454 }
455
456 void radv_CmdFillBuffer(
457 VkCommandBuffer commandBuffer,
458 VkBuffer dstBuffer,
459 VkDeviceSize dstOffset,
460 VkDeviceSize fillSize,
461 uint32_t data)
462 {
463 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
464 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
465
466 if (fillSize == VK_WHOLE_SIZE)
467 fillSize = (dst_buffer->size - dstOffset) & ~3ull;
468
469 radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
470 fillSize, data);
471 }
472
473 void radv_CmdCopyBuffer(
474 VkCommandBuffer commandBuffer,
475 VkBuffer srcBuffer,
476 VkBuffer destBuffer,
477 uint32_t regionCount,
478 const VkBufferCopy* pRegions)
479 {
480 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
481 RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
482 RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
483 bool old_predicating;
484
485 /* VK_EXT_conditional_rendering says that copy commands should not be
486 * affected by conditional rendering.
487 */
488 old_predicating = cmd_buffer->state.predicating;
489 cmd_buffer->state.predicating = false;
490
491 for (unsigned r = 0; r < regionCount; r++) {
492 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
493 uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
494 uint64_t copy_size = pRegions[r].size;
495
496 radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
497 src_offset, dest_offset, copy_size);
498 }
499
500 /* Restore conditional rendering. */
501 cmd_buffer->state.predicating = old_predicating;
502 }
503
504 void radv_CmdUpdateBuffer(
505 VkCommandBuffer commandBuffer,
506 VkBuffer dstBuffer,
507 VkDeviceSize dstOffset,
508 VkDeviceSize dataSize,
509 const void* pData)
510 {
511 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
512 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
513 bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
514 uint64_t words = dataSize / 4;
515 uint64_t va = radv_buffer_get_va(dst_buffer->bo);
516 va += dstOffset + dst_buffer->offset;
517
518 assert(!(dataSize & 3));
519 assert(!(va & 3));
520
521 if (!dataSize)
522 return;
523
524 if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
525 si_emit_cache_flush(cmd_buffer);
526
527 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
528
529 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
530
531 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
532 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
533 V_370_MEM : V_370_MEM_GRBM) |
534 S_370_WR_CONFIRM(1) |
535 S_370_ENGINE_SEL(V_370_ME));
536 radeon_emit(cmd_buffer->cs, va);
537 radeon_emit(cmd_buffer->cs, va >> 32);
538 radeon_emit_array(cmd_buffer->cs, pData, words);
539
540 if (unlikely(cmd_buffer->device->trace_bo))
541 radv_cmd_buffer_trace_emit(cmd_buffer);
542 } else {
543 uint32_t buf_offset;
544 radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
545 radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
546 buf_offset, dstOffset + dst_buffer->offset, dataSize);
547 }
548 }