#include "ac_debug.h"
-#include "addrlib/gfx9/chip/gfx9_enum.h"
-
enum {
RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0),
RADV_PREFETCH_VS = (1 << 1),
VkImageLayout dst_layout,
uint32_t src_family,
uint32_t dst_family,
- const VkImageSubresourceRange *range,
- VkImageAspectFlags pending_clears);
+ const VkImageSubresourceRange *range);
const struct radv_dynamic_state default_dynamic_state = {
.viewport = {
cmd_buffer->descriptors[i].push_dirty = false;
}
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+ cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
- unsigned eop_bug_offset;
+ unsigned fence_offset, eop_bug_offset;
void *fence_ptr;
- radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
- &cmd_buffer->gfx9_fence_offset,
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset,
&fence_ptr);
- cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
+
+ cmd_buffer->gfx9_fence_va =
+ radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ cmd_buffer->gfx9_fence_va += fence_offset;
/* Allocate a buffer for the EOP bug on GFX9. */
- radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
&eop_bug_offset, &fence_ptr);
cmd_buffer->gfx9_eop_bug_va =
radv_buffer_get_va(cmd_buffer->upload.upload_bo);
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_32BIT);
+ RADEON_FLAG_32BIT,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER);
if (!bo) {
cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
unsigned *out_offset,
void **ptr)
{
+ assert(util_is_power_of_two_nonzero(alignment));
+
uint64_t offset = align(cmd_buffer->upload.offset, alignment);
if (offset + size > cmd_buffer->upload.size) {
if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, va);
enum radv_cmd_flush_bits flags)
{
if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
- uint32_t *ptr = NULL;
- uint64_t va = 0;
-
assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) +
- cmd_buffer->gfx9_fence_offset;
- ptr = &cmd_buffer->gfx9_fence_idx;
- }
-
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
/* Force wait for graphics or compute engines to be idle. */
si_cs_emit_cache_flush(cmd_buffer->cs,
cmd_buffer->device->physical_device->rad_info.chip_class,
- ptr, va,
+ &cmd_buffer->gfx9_fence_idx,
+ cmd_buffer->gfx9_fence_va,
radv_cmd_buffer_uses_mec(cmd_buffer),
flags, cmd_buffer->gfx9_eop_bug_va);
}
if (loc->sgpr_idx == -1)
return;
- assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
- assert(!loc->indirect);
+ assert(loc->num_sgprs == 1);
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
base_reg + loc->sgpr_idx * 4, va, false);
struct radv_userdata_info *loc = &locs->descriptor_sets[start];
unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
- radv_emit_shader_pointer_head(cs, sh_offset, count,
- HAVE_32BIT_POINTERS);
+ radv_emit_shader_pointer_head(cs, sh_offset, count, true);
for (int i = 0; i < count; i++) {
struct radv_descriptor_set *set =
descriptors_state->sets[start + i];
- radv_emit_shader_pointer_body(device, cs, set->va,
- HAVE_32BIT_POINTERS);
+ radv_emit_shader_pointer_body(device, cs, set->va, true);
}
}
}
+static void
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
+ gl_shader_stage stage,
+ int idx, int count, uint32_t *values)
+{
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+ uint32_t base_reg = pipeline->user_data_0[stage];
+ if (loc->sgpr_idx == -1)
+ return;
+
+ assert(loc->num_sgprs == count);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
+ radeon_emit_array(cmd_buffer->cs, values, count);
+}
+
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
}
+ /* TODO: avoid redundantly setting context registers */
radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
+ if (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
+ cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
+ memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf,
+ pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) {
+ radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ }
+
for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
if (!pipeline->shaders[i])
continue;
cmd_buffer->state.dynamic.scissor.scissors,
cmd_buffer->state.dynamic.viewport.viewports,
cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = false;
}
static void
radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
- S_0287A0_EPITCH(att->attachment->image->surface.u.gfx9.surf.epitch));
+ cb->cb_mrt_epitch);
} else {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
}
}
+
+ if (radv_image_has_dcc(image)) {
+ /* Drawing with DCC enabled also compresses colorbuffers. */
+ radv_update_dcc_metadata(cmd_buffer, image, true);
+ }
}
static void
radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
struct radv_ds_buffer_info *ds,
struct radv_image *image, VkImageLayout layout,
- bool requires_cond_write)
+ bool requires_cond_exec)
{
uint32_t db_z_info = ds->db_z_info;
uint32_t db_z_info_reg;
}
/* When we don't know the last fast clear value we need to emit a
- * conditional packet, otherwise we can update DB_Z_INFO directly.
+ * conditional packet that will eventually skip the following
+ * SET_CONTEXT_REG packet.
*/
- if (requires_cond_write) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
-
- const uint32_t write_space = 0 << 8; /* register */
- const uint32_t poll_space = 1 << 4; /* memory */
- const uint32_t function = 3 << 0; /* equal to the reference */
- const uint32_t options = write_space | poll_space | function;
- radeon_emit(cmd_buffer->cs, options);
-
- /* poll address - location of the depth clear value */
+ if (requires_cond_exec) {
uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->clear_value_offset;
-
- /* In presence of stencil format, we have to adjust the base
- * address because the first value is the stencil clear value.
- */
- if (vk_format_is_stencil(image->vk_format))
- va += 4;
+ va += image->offset + image->tc_compat_zrange_offset;
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
-
- radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */
- radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */
- radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
- radeon_emit(cmd_buffer->cs, 0u); /* write address high */
- radeon_emit(cmd_buffer->cs, db_z_info);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
}
+
+ radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
}
static void
if (!framebuffer || !subpass)
return;
- att_idx = subpass->depth_stencil_attachment.attachment;
- if (att_idx == VK_ATTACHMENT_UNUSED)
+ if (!subpass->depth_stencil_attachment)
return;
+ att_idx = subpass->depth_stencil_attachment->attachment;
att = &framebuffer->attachments[att_idx];
if (att->attachment->image != image)
return;
*/
if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
ds_clear_value.depth == 0.0) {
- VkImageLayout layout = subpass->depth_stencil_attachment.layout;
+ VkImageLayout layout = subpass->depth_stencil_attachment->layout;
radv_update_zrange_precision(cmd_buffer, &att->ds, image,
layout, false);
}
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
/**
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
++reg_count;
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, cmd_buffer->state.predicating));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
radeon_emit(cs, fui(ds_clear_value.depth));
}
+/**
+ * Update the TC-compat metadata value for this image.
+ */
+static void
+radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ uint32_t value)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->tc_compat_zrange_offset;
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+}
+
+static void
+radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ VkClearDepthStencilValue ds_clear_value)
+{
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->tc_compat_zrange_offset;
+ uint32_t cond_val;
+
+ /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
+ * depth clear value is 0.0f.
+ */
+ cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
+
+ radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
+}
+
/**
* Update the clear depth/stencil values for this image.
*/
radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
+ if (radv_image_is_tc_compat_htile(image) &&
+ (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+ radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
+ ds_clear_value);
+ }
+
radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
aspects);
}
uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
+ if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(cs, reg_count);
} else {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
* cmask eliminate is required.
*/
void
-radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- bool value)
+radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image, bool value)
+{
+ uint64_t pred_val = value;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->fce_pred_offset;
+
+ assert(radv_image_has_dcc(image));
+
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+ radeon_emit(cmd_buffer->cs, pred_val);
+ radeon_emit(cmd_buffer->cs, pred_val >> 32);
+}
+
+/**
+ * Update the DCC predicate to reflect the compression state.
+ */
+void
+radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image, bool value)
{
uint64_t pred_val = value;
uint64_t va = radv_buffer_get_va(image->bo);
assert(radv_image_has_dcc(image));
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cmd_buffer->cs, va);
radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
radeon_emit(cs, color_values[0]);
radeon_emit(cs, color_values[1]);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
/**
assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, cmd_buffer->state.predicating));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
+ if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(cs, 2);
} else {
- /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo);
- assert(att->attachment->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(att->attachment->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
+ VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
radv_emit_fb_color_state(cmd_buffer, i, att, image, layout);
radv_load_color_clear_metadata(cmd_buffer, image, i);
- if (image->surface.bpe >= 8)
+ if (image->planes[0].surface.bpe >= 8)
num_bpp64_colorbufs++;
}
- if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
- int idx = subpass->depth_stencil_attachment.attachment;
- VkImageLayout layout = subpass->depth_stencil_attachment.layout;
+ if (subpass->depth_stencil_attachment) {
+ int idx = subpass->depth_stencil_attachment->attachment;
+ VkImageLayout layout = subpass->depth_stencil_attachment->layout;
struct radv_attachment_info *att = &framebuffer->attachments[idx];
struct radv_image *image = att->attachment->image;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo);
}
radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
{
struct radv_descriptor_state *descriptors_state =
radv_get_descriptors_state(cmd_buffer, bind_point);
- uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2;
- uint32_t size = MAX_SETS * 4 * ptr_size;
+ uint32_t size = MAX_SETS * 4;
uint32_t offset;
void *ptr;
return;
for (unsigned i = 0; i < MAX_SETS; i++) {
- uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size;
+ uint32_t *uptr = ((uint32_t *)ptr) + i;
uint64_t set_va = 0;
struct radv_descriptor_set *set = descriptors_state->sets[i];
if (descriptors_state->valid & (1u << i))
set_va = set->va;
uptr[0] = set_va & 0xffffffff;
- if (ptr_size == 2)
- uptr[1] = set_va >> 32;
}
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_pipeline_layout *layout = pipeline->layout;
struct radv_shader_variant *shader, *prev_shader;
+ bool need_push_constants = false;
unsigned offset;
void *ptr;
uint64_t va;
(!layout->push_constant_size && !layout->dynamic_offset_count))
return;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
- 16 * layout->dynamic_offset_count,
- 256, &offset, &ptr))
- return;
+ radv_foreach_stage(stage, stages) {
+ if (!pipeline->shaders[stage])
+ continue;
- memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
- memcpy((char*)ptr + layout->push_constant_size,
- descriptors_state->dynamic_buffers,
- 16 * layout->dynamic_offset_count);
+ need_push_constants |= pipeline->shaders[stage]->info.info.loads_push_constants;
+ need_push_constants |= pipeline->shaders[stage]->info.info.loads_dynamic_offsets;
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += offset;
+ uint8_t base = pipeline->shaders[stage]->info.info.base_inline_push_consts;
+ uint8_t count = pipeline->shaders[stage]->info.info.num_inline_push_consts;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+ radv_emit_inline_push_consts(cmd_buffer, pipeline, stage,
+ AC_UD_INLINE_PUSH_CONSTANTS,
+ count,
+ (uint32_t *)&cmd_buffer->push_constants[base * 4]);
+ }
- prev_shader = NULL;
- radv_foreach_stage(stage, stages) {
- shader = radv_get_shader(pipeline, stage);
+ if (need_push_constants) {
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
+ 16 * layout->dynamic_offset_count,
+ 256, &offset, &ptr))
+ return;
+
+ memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
+ memcpy((char*)ptr + layout->push_constant_size,
+ descriptors_state->dynamic_buffers,
+ 16 * layout->dynamic_offset_count);
+
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += offset;
+
+ MAYBE_UNUSED unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+ prev_shader = NULL;
+ radv_foreach_stage(stage, stages) {
+ shader = radv_get_shader(pipeline, stage);
- /* Avoid redundantly emitting the address for merged stages. */
- if (shader && shader != prev_shader) {
- radv_emit_userdata_address(cmd_buffer, pipeline, stage,
- AC_UD_PUSH_CONSTANTS, va);
+ /* Avoid redundantly emitting the address for merged stages. */
+ if (shader && shader != prev_shader) {
+ radv_emit_userdata_address(cmd_buffer, pipeline, stage,
+ AC_UD_PUSH_CONSTANTS, va);
- prev_shader = shader;
+ prev_shader = shader;
+ }
}
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
cmd_buffer->push_constant_stages &= ~stages;
- assert(cmd_buffer->cs->cdw <= cdw_max);
}
static void
{
if ((pipeline_is_dirty ||
(cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
- cmd_buffer->state.pipeline->vertex_elements.count &&
+ cmd_buffer->state.pipeline->num_vertex_bindings &&
radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
unsigned vb_offset;
void *vb_ptr;
uint32_t i = 0;
- uint32_t count = velems->count;
+ uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
uint64_t va;
/* allocate some descriptor state for vertex buffers */
for (i = 0; i < count; i++) {
uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
uint32_t offset;
- int vb = velems->binding[i];
- struct radv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
- uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb];
+ struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
+ uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i];
+
+ if (!buffer)
+ continue;
va = radv_buffer_get_va(buffer->bo);
- offset = cmd_buffer->vertex_bindings[vb].offset + velems->offset[i];
+ offset = cmd_buffer->vertex_bindings[i].offset;
va += offset + buffer->offset;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
else
desc[2] = buffer->size - offset;
- desc[3] = velems->rsrc_word3[i];
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
}
+struct radv_draw_info {
+ /**
+ * Number of vertices.
+ */
+ uint32_t count;
+
+ /**
+ * Index of the first vertex.
+ */
+ int32_t vertex_offset;
+
+ /**
+ * First instance id.
+ */
+ uint32_t first_instance;
+
+ /**
+ * Number of instances.
+ */
+ uint32_t instance_count;
+
+ /**
+ * First index (indexed draws only).
+ */
+ uint32_t first_index;
+
+ /**
+ * Whether it's an indexed draw.
+ */
+ bool indexed;
+
+ /**
+ * Indirect draw parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+ uint32_t stride;
+
+ /**
+ * Draw count parameters resource.
+ */
+ struct radv_buffer *count_buffer;
+ uint64_t count_buffer_offset;
+
+ /**
+ * Stream output parameters resource.
+ */
+ struct radv_buffer *strmout_buffer;
+ uint64_t strmout_buffer_offset;
+};
+
static void
-radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw,
- bool instanced_draw, bool indirect_draw,
- uint32_t draw_vertex_count)
+radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_draw_info *draw_info)
{
struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
struct radv_cmd_state *state = &cmd_buffer->state;
/* Draw state. */
ia_multi_vgt_param =
- si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw,
- indirect_draw, draw_vertex_count);
+ si_get_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1,
+ draw_info->indirect,
+ draw_info->indirect ? 0 : draw_info->count);
if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
if (info->chip_class >= GFX9) {
/* Primitive restart. */
primitive_reset_en =
- indexed_draw && state->pipeline->graphics.prim_restart_enable;
+ draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
if (primitive_reset_en != state->last_primitive_reset_en) {
state->last_primitive_reset_en = primitive_reset_en;
state->last_primitive_reset_index = primitive_reset_index;
}
}
+
+ if (draw_info->strmout_buffer) {
+ uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
+
+ va += draw_info->strmout_buffer->offset +
+ draw_info->strmout_buffer_offset;
+
+ radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+ draw_info->stride);
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
+ radeon_emit(cs, 0); /* unused */
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
+ }
}
static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
range.baseArrayLayer = view->base_layer;
range.layerCount = cmd_buffer->state.framebuffer->layers;
+ if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
+ /* If the current subpass uses multiview, the driver might have
+ * performed a fast color/depth clear to the whole image
+ * (including all layers). To make sure the driver will
+ * decompress the image correctly (if needed), we have to
+ * account for the "real" number of layers. If the view mask is
+ * sparse, this will decompress more layers than needed.
+ */
+ range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
+ }
+
radv_handle_image_transition(cmd_buffer,
view->image,
cmd_buffer->state.attachments[idx].current_layout,
- att.layout, 0, 0, &range,
- cmd_buffer->state.attachments[idx].pending_clear_aspects);
+ att.layout, 0, 0, &range);
cmd_buffer->state.attachments[idx].current_layout = att.layout;
void
radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_subpass *subpass, bool transitions)
+ const struct radv_subpass *subpass)
{
- if (transitions) {
- radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
-
- for (unsigned i = 0; i < subpass->color_count; ++i) {
- if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->color_attachments[i]);
- }
-
- for (unsigned i = 0; i < subpass->input_count; ++i) {
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->input_attachments[i]);
- }
-
- if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->depth_stencil_attachment);
- }
- }
-
cmd_buffer->state.subpass = subpass;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
if (result != VK_SUCCESS)
return result;
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
}
if (unlikely(cmd_buffer->device->trace_bo)) {
void radv_CmdPushDescriptorSetWithTemplateKHR(
VkCommandBuffer commandBuffer,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
VkPipelineLayout _layout,
uint32_t set,
const void* pData)
if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
return;
+ assert(!pipeline->ctx_cs.cdw);
+
cmd_buffer->state.emitted_compute_pipeline = pipeline;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
assert(firstViewport < MAX_VIEWPORTS);
assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
+ if (!memcmp(state->dynamic.viewport.viewports + firstViewport,
+ pViewports, viewportCount * sizeof(*pViewports))) {
+ return;
+ }
+
memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
viewportCount * sizeof(*pViewports));
assert(firstScissor < MAX_SCISSORS);
assert(total_count >= 1 && total_count <= MAX_SCISSORS);
+ if (!memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
+ scissorCount * sizeof(*pScissors))) {
+ return;
+ }
+
memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
scissorCount * sizeof(*pScissors));
float lineWidth)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (cmd_buffer->state.dynamic.line_width == lineWidth)
+ return;
+
cmd_buffer->state.dynamic.line_width = lineWidth;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
}
float depthBiasSlopeFactor)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
+ state->dynamic.depth_bias.clamp == depthBiasClamp &&
+ state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
+ return;
+ }
- cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor;
- cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp;
- cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor;
+ state->dynamic.depth_bias.bias = depthBiasConstantFactor;
+ state->dynamic.depth_bias.clamp = depthBiasClamp;
+ state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
}
void radv_CmdSetBlendConstants(
const float blendConstants[4])
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- memcpy(cmd_buffer->state.dynamic.blend_constants,
- blendConstants, sizeof(float) * 4);
+ if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
+ return;
+
+ memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
}
void radv_CmdSetDepthBounds(
float maxDepthBounds)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (state->dynamic.depth_bounds.min == minDepthBounds &&
+ state->dynamic.depth_bounds.max == maxDepthBounds) {
+ return;
+ }
- cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
- cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
+ state->dynamic.depth_bounds.min = minDepthBounds;
+ state->dynamic.depth_bounds.max = maxDepthBounds;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
}
void radv_CmdSetStencilCompareMask(
uint32_t compareMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
+ bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask;
+ state->dynamic.stencil_compare_mask.front = compareMask;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask;
+ state->dynamic.stencil_compare_mask.back = compareMask;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
}
void radv_CmdSetStencilWriteMask(
uint32_t writeMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
+ bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask;
+ state->dynamic.stencil_write_mask.front = writeMask;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask;
+ state->dynamic.stencil_write_mask.back = writeMask;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
}
void radv_CmdSetStencilReference(
uint32_t reference)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_reference.front == reference;
+ bool back_same = state->dynamic.stencil_reference.back == reference;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
cmd_buffer->state.dynamic.stencil_reference.front = reference;
assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
+ if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
+ pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
+ return;
+ }
+
typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
pDiscardRectangles, discardRectangleCount);
void radv_TrimCommandPool(
VkDevice device,
VkCommandPool commandPool,
- VkCommandPoolTrimFlagsKHR flags)
+ VkCommandPoolTrimFlags flags)
{
RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
}
}
+static uint32_t
+radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t subpass_id = state->subpass - state->pass->subpasses;
+
+ /* The id of this subpass shouldn't exceed the number of subpasses in
+ * this render pass minus 1.
+ */
+ assert(subpass_id < state->pass->subpass_count);
+ return subpass_id;
+}
+
+static void
+radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
+ uint32_t subpass_id)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 4096);
+
+ radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
+
+ for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+ const uint32_t a = subpass->attachments[i].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ radv_handle_subpass_image_transition(cmd_buffer,
+ subpass->attachments[i]);
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+ radv_cmd_buffer_clear_subpass(cmd_buffer);
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+static void
+radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ const struct radv_subpass *subpass = state->subpass;
+ uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+
+ radv_cmd_buffer_resolve_subpass(cmd_buffer);
+
+ for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+ const uint32_t a = subpass->attachments[i].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ if (state->pass->attachments[a].last_subpass_idx != subpass_id)
+ continue;
+
+ VkImageLayout layout = state->pass->attachments[a].final_layout;
+ radv_handle_subpass_image_transition(cmd_buffer,
+ (struct radv_subpass_attachment){a, layout});
+ }
+}
+
void radv_CmdBeginRenderPass(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBegin,
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 2048);
- MAYBE_UNUSED VkResult result;
+ VkResult result;
cmd_buffer->state.framebuffer = framebuffer;
cmd_buffer->state.pass = pass;
if (result != VK_SUCCESS)
return;
- radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true);
- assert(cmd_buffer->cs->cdw <= cdw_max);
-
- radv_cmd_buffer_clear_subpass(cmd_buffer);
+ radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
}
void radv_CmdBeginRenderPass2KHR(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_resolve_subpass(cmd_buffer);
-
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
- 2048);
-
- radv_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1, true);
- radv_cmd_buffer_clear_subpass(cmd_buffer);
+ uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
+ radv_cmd_buffer_end_subpass(cmd_buffer);
+ radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
}
void radv_CmdNextSubpass2KHR(
}
}
-struct radv_draw_info {
- /**
- * Number of vertices.
- */
- uint32_t count;
-
- /**
- * Index of the first vertex.
- */
- int32_t vertex_offset;
-
- /**
- * First instance id.
- */
- uint32_t first_instance;
-
- /**
- * Number of instances.
- */
- uint32_t instance_count;
-
- /**
- * First index (indexed draws only).
- */
- uint32_t first_index;
-
- /**
- * Whether it's an indexed draw.
- */
- bool indexed;
-
- /**
- * Indirect draw parameters resource.
- */
- struct radv_buffer *indirect;
- uint64_t indirect_offset;
- uint32_t stride;
-
- /**
- * Draw count parameters resource.
- */
- struct radv_buffer *count_buffer;
- uint64_t count_buffer_offset;
-
- /**
- * Stream output parameters resource.
- */
- struct radv_buffer *strmout_buffer;
- uint64_t strmout_buffer_offset;
-};
-
static void
radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
const struct radv_draw_info *info)
struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (info->strmout_buffer) {
- uint64_t va = radv_buffer_get_va(info->strmout_buffer->bo);
-
- va += info->strmout_buffer->offset +
- info->strmout_buffer_offset;
-
- radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
- info->stride);
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
- radeon_emit(cs, 0); /* unused */
-
- radv_cs_add_buffer(ws, cs, info->strmout_buffer->bo);
- }
-
if (info->indirect) {
uint64_t va = radv_buffer_get_va(info->indirect->bo);
uint64_t count_va = 0;
* any context registers.
*/
static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
- bool indexed_draw)
+ const struct radv_draw_info *info)
{
struct radv_cmd_state *state = &cmd_buffer->state;
if (!cmd_buffer->device->physical_device->has_scissor_bug)
return false;
+ if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
+ return true;
+
uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
/* Index, vertex and streamout buffers don't change context regs, and
- * pipeline is handled later.
+ * pipeline is already handled.
*/
used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
RADV_CMD_DIRTY_VERTEX_BUFFER |
RADV_CMD_DIRTY_STREAMOUT_BUFFER |
RADV_CMD_DIRTY_PIPELINE);
- /* Assume all state changes except these two can imply context rolls. */
if (cmd_buffer->state.dirty & used_states)
return true;
- if (cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
- return true;
-
- if (indexed_draw && state->pipeline->graphics.prim_restart_enable &&
+ if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
(state->index_type ? 0xffffffffu : 0xffffu) != state->last_primitive_reset_index)
return true;
radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
const struct radv_draw_info *info)
{
- bool late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info->indexed);
+ bool late_scissor_emission;
if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
radv_emit_graphics_pipeline(cmd_buffer);
+ /* This should be before the cmd_buffer->state.dirty is cleared
+ * (excluding RADV_CMD_DIRTY_PIPELINE) and after
+ * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
+ late_scissor_emission =
+ radv_need_late_scissor_emission(cmd_buffer, info);
+
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
radv_emit_framebuffer_state(cmd_buffer);
radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
- radv_emit_draw_registers(cmd_buffer, info->indexed,
- info->instance_count > 1, info->indirect,
- info->indirect ? 0 : info->count);
+ radv_emit_draw_registers(cmd_buffer, info);
if (late_scissor_emission)
radv_emit_scissor(cmd_buffer);
radeon_check_space(cmd_buffer->device->ws,
cmd_buffer->cs, 4096);
+ if (likely(!info->indirect)) {
+ /* SI-CI treat instance_count==0 as instance_count==1. There is
+ * no workaround for indirect draws, but we can at least skip
+ * direct draws.
+ */
+ if (unlikely(!info->instance_count))
+ return;
+
+ /* Handle count == 0. */
+ if (unlikely(!info->count && !info->strmout_buffer))
+ return;
+ }
+
/* Use optimal packet order based on whether we need to sync the
* pipeline.
*/
}
if (loc->sgpr_idx != -1) {
- assert(!loc->indirect);
assert(loc->num_sgprs == 3);
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
- radv_cmd_buffer_resolve_subpass(cmd_buffer);
-
- for (unsigned i = 0; i < cmd_buffer->state.framebuffer->attachment_count; ++i) {
- VkImageLayout layout = cmd_buffer->state.pass->attachments[i].final_layout;
- radv_handle_subpass_image_transition(cmd_buffer,
- (struct radv_subpass_attachment){i, layout});
- }
+ radv_cmd_buffer_end_subpass(cmd_buffer);
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
assert(range->baseMipLevel == 0);
assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS);
unsigned layer_count = radv_get_layerCount(image, range);
- uint64_t size = image->surface.htile_slice_size * layer_count;
+ uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
uint64_t offset = image->offset + image->htile_offset +
- image->surface.htile_slice_size * range->baseArrayLayer;
+ image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
struct radv_cmd_state *state = &cmd_buffer->state;
VkClearDepthStencilValue value = {};
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
+
+ if (radv_image_is_tc_compat_htile(image)) {
+ /* Initialize the TC-compat metada value to 0 because by
+ * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
+ * need have to conditionally update its value when performing
+ * a fast depth clear.
+ */
+ radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
+ }
}
static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
VkImageLayout dst_layout,
unsigned src_queue_mask,
unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
- VkImageAspectFlags pending_clears)
+ const VkImageSubresourceRange *range)
{
if (!radv_image_has_htile(image))
return;
- if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
- radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
- /* TODO: merge with the clear if applicable */
- radv_initialize_htile(cmd_buffer, image, range, 0);
+ if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+ uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
+
+ if (radv_layout_is_htile_compressed(image, dst_layout,
+ dst_queue_mask)) {
+ clear_value = 0;
+ }
+
+ radv_initialize_htile(cmd_buffer, image, range, clear_value);
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
}
+void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ static const uint32_t fmask_clear_values[4] = {
+ 0x00000000,
+ 0x02020202,
+ 0xE4E4E4E4,
+ 0x76543210
+ };
+ uint32_t log2_samples = util_logbase2(image->info.samples);
+ uint32_t value = fmask_clear_values[log2_samples];
+
+ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+ state->flush_bits |= radv_clear_fmask(cmd_buffer, image, value);
+
+ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+}
+
void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image, uint32_t value)
{
radv_initialise_cmask(cmd_buffer, image, value);
}
+ if (radv_image_has_fmask(image)) {
+ radv_initialize_fmask(cmd_buffer, image);
+ }
+
if (radv_image_has_dcc(image)) {
uint32_t value = 0xffffffffu; /* Fully expanded mode. */
bool need_decompress_pass = false;
radv_initialize_dcc(cmd_buffer, image, value);
- radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image,
- need_decompress_pass);
+ radv_update_fce_metadata(cmd_buffer, image,
+ need_decompress_pass);
}
if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
!radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
}
+
+ if (radv_image_has_fmask(image)) {
+ if (src_layout != VK_IMAGE_LAYOUT_GENERAL &&
+ dst_layout == VK_IMAGE_LAYOUT_GENERAL) {
+ radv_expand_fmask_image_inplace(cmd_buffer, image, range);
+ }
+ }
}
}
VkImageLayout dst_layout,
uint32_t src_family,
uint32_t dst_family,
- const VkImageSubresourceRange *range,
- VkImageAspectFlags pending_clears)
+ const VkImageSubresourceRange *range)
{
if (image->exclusive && src_family != dst_family) {
/* This is an acquire or a release operation and there will be
return;
}
+ if (src_layout == dst_layout)
+ return;
+
unsigned src_queue_mask =
radv_image_queue_family_mask(image, src_family,
cmd_buffer->queue_family_index);
radv_handle_depth_image_transition(cmd_buffer, image,
src_layout, dst_layout,
src_queue_mask, dst_queue_mask,
- range, pending_clears);
+ range);
} else {
radv_handle_color_image_transition(cmd_buffer, image,
src_layout, dst_layout,
uint32_t eventCount;
const VkEvent *pEvents;
VkPipelineStageFlags srcStageMask;
+ VkPipelineStageFlags dstStageMask;
};
static void
image);
}
- radv_stage_flush(cmd_buffer, info->srcStageMask);
+ /* The Vulkan spec 1.1.98 says:
+ *
+ * "An execution dependency with only
+ * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
+ * will only prevent that stage from executing in subsequently
+ * submitted commands. As this stage does not perform any actual
+ * execution, this is not observable - in effect, it does not delay
+ * processing of subsequent commands. Similarly an execution dependency
+ * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
+ * will effectively not wait for any prior commands to complete."
+ */
+ if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+ radv_stage_flush(cmd_buffer, info->srcStageMask);
cmd_buffer->state.flush_bits |= src_flush_bits;
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
pImageMemoryBarriers[i].newLayout,
pImageMemoryBarriers[i].srcQueueFamilyIndex,
pImageMemoryBarriers[i].dstQueueFamilyIndex,
- &pImageMemoryBarriers[i].subresourceRange,
- 0);
+ &pImageMemoryBarriers[i].subresourceRange);
}
/* Make sure CP DMA is idle because the driver might have performed a
* DMA operation for copying or filling buffers/images.
*/
- si_cp_dma_wait_for_idle(cmd_buffer);
+ if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+ si_cp_dma_wait_for_idle(cmd_buffer);
cmd_buffer->state.flush_bits |= dst_flush_bits;
}
info.eventCount = 0;
info.pEvents = NULL;
info.srcStageMask = srcStageMask;
+ info.dstStageMask = destStageMask;
radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
bufferMemoryBarrierCount, pBufferMemoryBarriers,
/* Make sure CP DMA is idle because the driver might have performed a
* DMA operation for copying or filling buffers/images.
*/
- si_cp_dma_wait_for_idle(cmd_buffer);
+ if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+ si_cp_dma_wait_for_idle(cmd_buffer);
/* TODO: Emit EOS events for syncing PS/CS stages. */
if (!(stageMask & ~top_of_pipe_flags)) {
/* Just need to sync the PFP engine. */
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
} else if (!(stageMask & ~post_index_fetch_flags)) {
/* Sync ME because PFP reads index and indirect buffers. */
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, va);
cmd_buffer->device->physical_device->rad_info.chip_class,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DATA_SEL_VALUE_32BIT, va, 2, value,
+ EOP_DATA_SEL_VALUE_32BIT, va, value,
cmd_buffer->gfx9_eop_bug_va);
}
draw_visible = false;
}
+ si_emit_cache_flush(cmd_buffer);
+
/* Enable predication for this command buffer. */
si_emit_set_predication_state(cmd_buffer, draw_visible, va);
cmd_buffer->state.predicating = true;
enabled_mask |= 1 << idx;
}
- cmd_buffer->state.streamout.enabled_mask = enabled_mask;
+ cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
}
S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
radeon_emit(cs, so->hw_enabled_mask &
so->enabled_stream_buffers_mask);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
for_each_bit(i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount)
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
/* SI binds streamout buffers as shader resources.
radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
/* The array of counter buffers is optional. */
RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
for_each_bit(i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount)
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
* that the primitives-emitted query won't increment.
*/
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
radv_set_streamout_enable(cmd_buffer, false);