cmd_buffer->sample_positions_needed = false;
if (cmd_buffer->upload.upload_bo)
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
- cmd_buffer->upload.upload_bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+ cmd_buffer->upload.upload_bo, 8);
cmd_buffer->upload.offset = 0;
cmd_buffer->record_result = VK_SUCCESS;
return false;
}
- device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8);
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo, 8);
if (cmd_buffer->upload.upload_bo) {
upload = malloc(sizeof(*upload));
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
++cmd_buffer->state.trace_id;
- device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+ radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8);
radv_emit_write_data_packet(cs, va, 1, &cmd_buffer->state.trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
data[0] = (uintptr_t)pipeline;
data[1] = (uintptr_t)pipeline >> 32;
- device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+ radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8);
radv_emit_write_data_packet(cs, va, 2, data);
}
data[i * 2 + 1] = (uintptr_t)set >> 32;
}
- device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+ radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8);
radv_emit_write_data_packet(cs, va, MAX_SETS * 2, data);
}
raster->pa_su_sc_mode_cntl);
}
+static inline void
+radv_emit_prefetch_TC_L2_async(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+ unsigned size)
+{
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
+ si_cp_dma_prefetch(cmd_buffer, va, size);
+}
+
+static void
+radv_emit_VBO_descriptors_prefetch(struct radv_cmd_buffer *cmd_buffer)
+{
+ if (cmd_buffer->state.vb_prefetch_dirty) {
+ radv_emit_prefetch_TC_L2_async(cmd_buffer,
+ cmd_buffer->state.vb_va,
+ cmd_buffer->state.vb_size);
+ cmd_buffer->state.vb_prefetch_dirty = false;
+ }
+}
+
static void
radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer,
struct radv_shader_variant *shader)
va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- ws->cs_add_buffer(cs, shader->bo, 8);
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
- si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
+ radv_cs_add_buffer(ws, cs, shader->bo, 8);
+ radv_emit_prefetch_TC_L2_async(cmd_buffer, va, shader->code_size);
}
static void
-radv_emit_shaders_prefetch(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline)
{
radv_emit_shader_prefetch(cmd_buffer,
pipeline->shaders[MESA_SHADER_VERTEX]);
+ radv_emit_VBO_descriptors_prefetch(cmd_buffer);
radv_emit_shader_prefetch(cmd_buffer,
pipeline->shaders[MESA_SHADER_TESS_CTRL]);
radv_emit_shader_prefetch(cmd_buffer,
static void
radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader,
- struct ac_vs_output_info *outinfo)
+ struct radv_shader_variant *shader)
{
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- unsigned export_count;
- export_count = MAX2(1, outinfo->param_exports);
radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(export_count - 1));
+ pipeline->graphics.vs.spi_vs_out_config);
radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT,
- S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE));
-
+ pipeline->graphics.vs.spi_shader_pos_format);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
- pipeline->graphics.pa_cl_vs_out_cntl);
+ pipeline->graphics.vs.pa_cl_vs_out_cntl);
if (cmd_buffer->device->physical_device->rad_info.chip_class <= VI)
radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
- S_028AB4_REUSE_OFF(outinfo->writes_viewport_index));
+ pipeline->graphics.vs.vgt_reuse_off);
}
static void
radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
- struct radv_shader_variant *shader,
- struct ac_es_output_info *outinfo)
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader)
{
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- outinfo->esgs_itemsize / 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, va >> 40);
if (vs->info.vs.as_ls)
radv_emit_hw_ls(cmd_buffer, vs);
else if (vs->info.vs.as_es)
- radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info);
+ radv_emit_hw_es(cmd_buffer, pipeline, vs);
else
- radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo);
+ radv_emit_hw_vs(cmd_buffer, pipeline, vs);
}
if (tes) {
if (tes->info.tes.as_es)
- radv_emit_hw_es(cmd_buffer, tes, &tes->info.tes.es_info);
+ radv_emit_hw_es(cmd_buffer, pipeline, tes);
else
- radv_emit_hw_vs(cmd_buffer, pipeline, tes, &tes->info.tes.outinfo);
+ radv_emit_hw_vs(cmd_buffer, pipeline, tes);
}
radv_emit_hw_hs(cmd_buffer, tcs);
S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
S_028B90_ENABLE(gs_num_invocations > 0));
+ radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ pipeline->graphics.gs.vgt_esgs_ring_itemsize);
+
va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
radeon_set_context_reg(cmd_buffer->cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl);
radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup);
- radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, pipeline->graphics.gs.vgt_esgs_ring_itemsize);
} else {
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, gs->rsrc2);
}
- radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo);
+ radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader);
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
AC_UD_GS_VS_RING_STRIDE_ENTRIES);
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
++reg_count;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, image->bo, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
if (!image->surface.htile_size)
return;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
if (!image->surface.dcc_size)
return;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, image->bo, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
if (!image->cmask.size && !image->surface.dcc_size)
return;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, image->bo, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
return;
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + idx * 0x3c;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
int idx = subpass->color_attachments[i].attachment;
struct radv_attachment_info *att = &framebuffer->attachments[idx];
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, att->attachment->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo, 8);
assert(att->attachment->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT);
radv_emit_fb_color_state(cmd_buffer, i, &att->cb);
VkImageLayout layout = subpass->depth_stencil_attachment.layout;
struct radv_attachment_info *att = &framebuffer->attachments[idx];
struct radv_image *image = att->attachment->image;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, att->attachment->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo, 8);
MAYBE_UNUSED uint32_t queue_mask = radv_image_queue_family_mask(image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
struct radv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb];
- device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 8);
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo, 8);
va = radv_buffer_get_va(buffer->bo);
offset = cmd_buffer->vertex_bindings[vb].offset + velems->offset[i];
radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
AC_UD_VS_VERTEX_BUFFERS, va);
+
+ cmd_buffer->state.vb_va = va;
+ cmd_buffer->state.vb_size = count * 16;
+ cmd_buffer->state.vb_prefetch_dirty = true;
}
cmd_buffer->state.vb_dirty = false;
struct radv_device *device = cmd_buffer->device;
if (device->gfx_init) {
uint64_t va = radv_buffer_get_va(device->gfx_init);
- device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8);
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->gfx_init, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
int index_size_shift = cmd_buffer->state.index_type ? 2 : 1;
cmd_buffer->state.max_index_count = (index_buffer->size - offset) >> index_size_shift;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, index_buffer->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo, 8);
}
for (unsigned j = 0; j < set->layout->buffer_count; ++j)
if (set->descriptors[j])
- ws->cs_add_buffer(cmd_buffer->cs, set->descriptors[j], 7);
+ radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j], 7);
if(set->bo)
- ws->cs_add_buffer(cmd_buffer->cs, set->bo, 8);
+ radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo, 8);
}
void radv_CmdBindDescriptorSets(
va += info->indirect->offset + info->indirect_offset;
- ws->cs_add_buffer(cs, info->indirect->bo, 8);
+ radv_cs_add_buffer(ws, cs, info->indirect->bo, 8);
radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
radeon_emit(cs, 1);
count_va += info->count_buffer->offset +
info->count_buffer_offset;
- ws->cs_add_buffer(cs, info->count_buffer->bo, 8);
+ radv_cs_add_buffer(ws, cs, info->count_buffer->bo, 8);
}
if (!state->subpass->view_mask) {
* important.
*/
if (pipeline_is_dirty) {
- radv_emit_shaders_prefetch(cmd_buffer,
- cmd_buffer->state.pipeline);
+ radv_emit_prefetch(cmd_buffer,
+ cmd_buffer->state.pipeline);
}
} else {
/* If we don't wait for idle, start prefetches first, then set
si_emit_cache_flush(cmd_buffer);
if (pipeline_is_dirty) {
- radv_emit_shaders_prefetch(cmd_buffer,
- cmd_buffer->state.pipeline);
+ radv_emit_prefetch(cmd_buffer,
+ cmd_buffer->state.pipeline);
}
if (!radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty))
va += info->indirect->offset + info->indirect_offset;
- ws->cs_add_buffer(cs, info->indirect->bo, 8);
+ radv_cs_add_buffer(ws, cs, info->indirect->bo, 8);
if (loc->sgpr_idx != -1) {
for (unsigned i = 0; i < grid_used; ++i) {
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(event->bo);
- cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo, 8);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
RADV_FROM_HANDLE(radv_event, event, pEvents[i]);
uint64_t va = radv_buffer_get_va(event->bo);
- cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo, 8);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);