unsigned dma_flags = 0;
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* DMA operations via L2 are coherent and faster.
+ * TODO: GFX7-GFX9 should also support this but it
+ * requires tests/benchmarks.
+ */
+ dma_flags |= CP_DMA_USE_L2;
+ }
+
si_cp_dma_prepare(cmd_buffer, byte_count,
size + skipped_size + realign_size,
&dma_flags);
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
unsigned dma_flags = CP_DMA_CLEAR;
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* DMA operations via L2 are coherent and faster.
+ * TODO: GFX7-GFX9 should also support this but it
+ * requires tests/benchmarks.
+ */
+ dma_flags |= CP_DMA_USE_L2;
+ }
+
si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
/* Emit the clear packet. */