unsigned export_count;
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
+ si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
export_count = MAX2(1, outinfo->param_exports);
radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
uint64_t va = ws->buffer_get_va(shader->bo);
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
+ si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
outinfo->esgs_itemsize / 4);
uint32_t rsrc2 = shader->rsrc2;
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
+ si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
radeon_emit(cmd_buffer->cs, va >> 8);
uint64_t va = ws->buffer_get_va(shader->bo);
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
+ si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
va = ws->buffer_get_va(gs->bo);
ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8);
+ si_cp_dma_prefetch(cmd_buffer, va, gs->code_size);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, va >> 40);
va = ws->buffer_get_va(ps->bo);
ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
+ si_cp_dma_prefetch(cmd_buffer, va, ps->code_size);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
uint64_t src_va, uint64_t dest_va,
uint64_t size);
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+ unsigned size);
void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
uint64_t size, unsigned value);
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+ unsigned size)
+{
+ uint64_t aligned_va = va & ~(CP_DMA_ALIGNMENT - 1);
+ uint64_t aligned_size = ((va + size + CP_DMA_ALIGNMENT -1) & ~(CP_DMA_ALIGNMENT - 1)) - aligned_va;
+
+ si_emit_cp_dma_copy_buffer(cmd_buffer, aligned_va, aligned_va,
+ aligned_size, CIK_CP_DMA_USE_L2);
+}
+
static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
uint64_t remaining_size, unsigned *flags)
{