cmd_buffer->descriptors[i].push_dirty = false;
}
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+ cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
- unsigned eop_bug_offset;
+ unsigned fence_offset, eop_bug_offset;
void *fence_ptr;
- radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
- &cmd_buffer->gfx9_fence_offset,
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset,
&fence_ptr);
- cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
+
+ cmd_buffer->gfx9_fence_va =
+ radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ cmd_buffer->gfx9_fence_va += fence_offset;
/* Allocate a buffer for the EOP bug on GFX9. */
- radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
&eop_bug_offset, &fence_ptr);
cmd_buffer->gfx9_eop_bug_va =
radv_buffer_get_va(cmd_buffer->upload.upload_bo);
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_32BIT);
+ RADEON_FLAG_32BIT,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER);
if (!bo) {
cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
unsigned *out_offset,
void **ptr)
{
+ assert(util_is_power_of_two_nonzero(alignment));
+
uint64_t offset = align(cmd_buffer->upload.offset, alignment);
if (offset + size > cmd_buffer->upload.size) {
if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, va);
enum radv_cmd_flush_bits flags)
{
if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
- uint32_t *ptr = NULL;
- uint64_t va = 0;
-
assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) +
- cmd_buffer->gfx9_fence_offset;
- ptr = &cmd_buffer->gfx9_fence_idx;
- }
-
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
/* Force wait for graphics or compute engines to be idle. */
si_cs_emit_cache_flush(cmd_buffer->cs,
cmd_buffer->device->physical_device->rad_info.chip_class,
- ptr, va,
+ &cmd_buffer->gfx9_fence_idx,
+ cmd_buffer->gfx9_fence_va,
radv_cmd_buffer_uses_mec(cmd_buffer),
flags, cmd_buffer->gfx9_eop_bug_va);
}
return;
assert(loc->num_sgprs == 1);
- assert(!loc->indirect);
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
base_reg + loc->sgpr_idx * 4, va, false);
}
}
+static void
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
+ gl_shader_stage stage,
+ int idx, int count, uint32_t *values)
+{
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+ uint32_t base_reg = pipeline->user_data_0[stage];
+ if (loc->sgpr_idx == -1)
+ return;
+
+ assert(loc->num_sgprs == count);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
+ radeon_emit_array(cmd_buffer->cs, values, count);
+}
+
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
}
+ /* TODO: avoid redundantly setting context registers */
radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
+ if (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
+ cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
+ memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf,
+ pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) {
+ radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ }
+
for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
if (!pipeline->shaders[i])
continue;
cmd_buffer->state.dynamic.scissor.scissors,
cmd_buffer->state.dynamic.viewport.viewports,
cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = false;
}
static void
if (!framebuffer || !subpass)
return;
- att_idx = subpass->depth_stencil_attachment.attachment;
- if (att_idx == VK_ATTACHMENT_UNUSED)
+ if (!subpass->depth_stencil_attachment)
return;
+ att_idx = subpass->depth_stencil_attachment->attachment;
att = &framebuffer->attachments[att_idx];
if (att->attachment->image != image)
return;
*/
if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
ds_clear_value.depth == 0.0) {
- VkImageLayout layout = subpass->depth_stencil_attachment.layout;
+ VkImageLayout layout = subpass->depth_stencil_attachment->layout;
radv_update_zrange_precision(cmd_buffer, &att->ds, image,
layout, false);
}
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
/**
++reg_count;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
va += image->offset + image->tc_compat_zrange_offset;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
struct radv_image *image,
VkClearDepthStencilValue ds_clear_value)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->tc_compat_zrange_offset;
uint32_t cond_val;
uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
+ if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
assert(radv_image_has_dcc(image));
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cmd_buffer->cs, va);
assert(radv_image_has_dcc(image));
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cmd_buffer->cs, va);
radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
radeon_emit(cs, color_values[0]);
radeon_emit(cs, color_values[1]);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
/**
assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
+ if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(cs, 2);
} else {
- /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
num_bpp64_colorbufs++;
}
- if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
- int idx = subpass->depth_stencil_attachment.attachment;
- VkImageLayout layout = subpass->depth_stencil_attachment.layout;
+ if (subpass->depth_stencil_attachment) {
+ int idx = subpass->depth_stencil_attachment->attachment;
+ VkImageLayout layout = subpass->depth_stencil_attachment->layout;
struct radv_attachment_info *att = &framebuffer->attachments[idx];
struct radv_image *image = att->attachment->image;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo);
}
radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_pipeline_layout *layout = pipeline->layout;
struct radv_shader_variant *shader, *prev_shader;
+ bool need_push_constants = false;
unsigned offset;
void *ptr;
uint64_t va;
(!layout->push_constant_size && !layout->dynamic_offset_count))
return;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
- 16 * layout->dynamic_offset_count,
- 256, &offset, &ptr))
- return;
+ radv_foreach_stage(stage, stages) {
+ if (!pipeline->shaders[stage])
+ continue;
- memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
- memcpy((char*)ptr + layout->push_constant_size,
- descriptors_state->dynamic_buffers,
- 16 * layout->dynamic_offset_count);
+ need_push_constants |= pipeline->shaders[stage]->info.info.loads_push_constants;
+ need_push_constants |= pipeline->shaders[stage]->info.info.loads_dynamic_offsets;
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += offset;
+ uint8_t base = pipeline->shaders[stage]->info.info.base_inline_push_consts;
+ uint8_t count = pipeline->shaders[stage]->info.info.num_inline_push_consts;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+ radv_emit_inline_push_consts(cmd_buffer, pipeline, stage,
+ AC_UD_INLINE_PUSH_CONSTANTS,
+ count,
+ (uint32_t *)&cmd_buffer->push_constants[base * 4]);
+ }
- prev_shader = NULL;
- radv_foreach_stage(stage, stages) {
- shader = radv_get_shader(pipeline, stage);
+ if (need_push_constants) {
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
+ 16 * layout->dynamic_offset_count,
+ 256, &offset, &ptr))
+ return;
- /* Avoid redundantly emitting the address for merged stages. */
- if (shader && shader != prev_shader) {
- radv_emit_userdata_address(cmd_buffer, pipeline, stage,
- AC_UD_PUSH_CONSTANTS, va);
+ memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
+ memcpy((char*)ptr + layout->push_constant_size,
+ descriptors_state->dynamic_buffers,
+ 16 * layout->dynamic_offset_count);
- prev_shader = shader;
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += offset;
+
+ MAYBE_UNUSED unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+ prev_shader = NULL;
+ radv_foreach_stage(stage, stages) {
+ shader = radv_get_shader(pipeline, stage);
+
+ /* Avoid redundantly emitting the address for merged stages. */
+ if (shader && shader != prev_shader) {
+ radv_emit_userdata_address(cmd_buffer, pipeline, stage,
+ AC_UD_PUSH_CONSTANTS, va);
+
+ prev_shader = shader;
+ }
}
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
cmd_buffer->push_constant_stages &= ~stages;
- assert(cmd_buffer->cs->cdw <= cdw_max);
}
static void
radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
}
+struct radv_draw_info {
+ /**
+ * Number of vertices.
+ */
+ uint32_t count;
+
+ /**
+ * Index of the first vertex.
+ */
+ int32_t vertex_offset;
+
+ /**
+ * First instance id.
+ */
+ uint32_t first_instance;
+
+ /**
+ * Number of instances.
+ */
+ uint32_t instance_count;
+
+ /**
+ * First index (indexed draws only).
+ */
+ uint32_t first_index;
+
+ /**
+ * Whether it's an indexed draw.
+ */
+ bool indexed;
+
+ /**
+ * Indirect draw parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+ uint32_t stride;
+
+ /**
+ * Draw count parameters resource.
+ */
+ struct radv_buffer *count_buffer;
+ uint64_t count_buffer_offset;
+
+ /**
+ * Stream output parameters resource.
+ */
+ struct radv_buffer *strmout_buffer;
+ uint64_t strmout_buffer_offset;
+};
+
static void
-radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw,
- bool instanced_draw, bool indirect_draw,
- uint32_t draw_vertex_count)
+radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_draw_info *draw_info)
{
struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
struct radv_cmd_state *state = &cmd_buffer->state;
/* Draw state. */
ia_multi_vgt_param =
- si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw,
- indirect_draw, draw_vertex_count);
+ si_get_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1,
+ draw_info->indirect,
+ draw_info->indirect ? 0 : draw_info->count);
if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
if (info->chip_class >= GFX9) {
/* Primitive restart. */
primitive_reset_en =
- indexed_draw && state->pipeline->graphics.prim_restart_enable;
+ draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
if (primitive_reset_en != state->last_primitive_reset_en) {
state->last_primitive_reset_en = primitive_reset_en;
state->last_primitive_reset_index = primitive_reset_index;
}
}
+
+ if (draw_info->strmout_buffer) {
+ uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
+
+ va += draw_info->strmout_buffer->offset +
+ draw_info->strmout_buffer_offset;
+
+ radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+ draw_info->stride);
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
+ radeon_emit(cs, 0); /* unused */
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
+ }
}
static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
range.baseArrayLayer = view->base_layer;
range.layerCount = cmd_buffer->state.framebuffer->layers;
+ if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
+ /* If the current subpass uses multiview, the driver might have
+ * performed a fast color/depth clear to the whole image
+ * (including all layers). To make sure the driver will
+ * decompress the image correctly (if needed), we have to
+ * account for the "real" number of layers. If the view mask is
+ * sparse, this will decompress more layers than needed.
+ */
+ range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
+ }
+
radv_handle_image_transition(cmd_buffer,
view->image,
cmd_buffer->state.attachments[idx].current_layout,
void
radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_subpass *subpass, bool transitions)
+ const struct radv_subpass *subpass)
{
- if (transitions) {
- radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
-
- for (unsigned i = 0; i < subpass->color_count; ++i) {
- if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->color_attachments[i]);
- }
-
- for (unsigned i = 0; i < subpass->input_count; ++i) {
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->input_attachments[i]);
- }
-
- if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->depth_stencil_attachment);
- }
- }
-
cmd_buffer->state.subpass = subpass;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
if (result != VK_SUCCESS)
return result;
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
}
if (unlikely(cmd_buffer->device->trace_bo)) {
void radv_CmdPushDescriptorSetWithTemplateKHR(
VkCommandBuffer commandBuffer,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
VkPipelineLayout _layout,
uint32_t set,
const void* pData)
if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
return;
+ assert(!pipeline->ctx_cs.cdw);
+
cmd_buffer->state.emitted_compute_pipeline = pipeline;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
assert(firstViewport < MAX_VIEWPORTS);
assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
+ if (!memcmp(state->dynamic.viewport.viewports + firstViewport,
+ pViewports, viewportCount * sizeof(*pViewports))) {
+ return;
+ }
+
memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
viewportCount * sizeof(*pViewports));
assert(firstScissor < MAX_SCISSORS);
assert(total_count >= 1 && total_count <= MAX_SCISSORS);
+ if (!memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
+ scissorCount * sizeof(*pScissors))) {
+ return;
+ }
+
memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
scissorCount * sizeof(*pScissors));
float lineWidth)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (cmd_buffer->state.dynamic.line_width == lineWidth)
+ return;
+
cmd_buffer->state.dynamic.line_width = lineWidth;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
}
float depthBiasSlopeFactor)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
+ state->dynamic.depth_bias.clamp == depthBiasClamp &&
+ state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
+ return;
+ }
- cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor;
- cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp;
- cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor;
+ state->dynamic.depth_bias.bias = depthBiasConstantFactor;
+ state->dynamic.depth_bias.clamp = depthBiasClamp;
+ state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
}
void radv_CmdSetBlendConstants(
const float blendConstants[4])
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
+ return;
- memcpy(cmd_buffer->state.dynamic.blend_constants,
- blendConstants, sizeof(float) * 4);
+ memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
}
void radv_CmdSetDepthBounds(
float maxDepthBounds)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (state->dynamic.depth_bounds.min == minDepthBounds &&
+ state->dynamic.depth_bounds.max == maxDepthBounds) {
+ return;
+ }
- cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
- cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
+ state->dynamic.depth_bounds.min = minDepthBounds;
+ state->dynamic.depth_bounds.max = maxDepthBounds;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
}
void radv_CmdSetStencilCompareMask(
uint32_t compareMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
+ bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask;
+ state->dynamic.stencil_compare_mask.front = compareMask;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask;
+ state->dynamic.stencil_compare_mask.back = compareMask;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
}
void radv_CmdSetStencilWriteMask(
uint32_t writeMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
+ bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask;
+ state->dynamic.stencil_write_mask.front = writeMask;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask;
+ state->dynamic.stencil_write_mask.back = writeMask;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
}
void radv_CmdSetStencilReference(
uint32_t reference)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_reference.front == reference;
+ bool back_same = state->dynamic.stencil_reference.back == reference;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
cmd_buffer->state.dynamic.stencil_reference.front = reference;
assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
+ if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
+ pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
+ return;
+ }
+
typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
pDiscardRectangles, discardRectangleCount);
void radv_TrimCommandPool(
VkDevice device,
VkCommandPool commandPool,
- VkCommandPoolTrimFlagsKHR flags)
+ VkCommandPoolTrimFlags flags)
{
RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
}
}
+static uint32_t
+radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t subpass_id = state->subpass - state->pass->subpasses;
+
+ /* The id of this subpass shouldn't exceed the number of subpasses in
+ * this render pass minus 1.
+ */
+ assert(subpass_id < state->pass->subpass_count);
+ return subpass_id;
+}
+
+static void
+radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
+ uint32_t subpass_id)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 4096);
+
+ radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
+
+ for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+ const uint32_t a = subpass->attachments[i].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ radv_handle_subpass_image_transition(cmd_buffer,
+ subpass->attachments[i]);
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+ radv_cmd_buffer_clear_subpass(cmd_buffer);
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+static void
+radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ const struct radv_subpass *subpass = state->subpass;
+ uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+
+ radv_cmd_buffer_resolve_subpass(cmd_buffer);
+
+ for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+ const uint32_t a = subpass->attachments[i].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ if (state->pass->attachments[a].last_subpass_idx != subpass_id)
+ continue;
+
+ VkImageLayout layout = state->pass->attachments[a].final_layout;
+ radv_handle_subpass_image_transition(cmd_buffer,
+ (struct radv_subpass_attachment){a, layout});
+ }
+}
+
void radv_CmdBeginRenderPass(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBegin,
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 2048);
- MAYBE_UNUSED VkResult result;
+ VkResult result;
cmd_buffer->state.framebuffer = framebuffer;
cmd_buffer->state.pass = pass;
if (result != VK_SUCCESS)
return;
- radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true);
- assert(cmd_buffer->cs->cdw <= cdw_max);
-
- radv_cmd_buffer_clear_subpass(cmd_buffer);
+ radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
}
void radv_CmdBeginRenderPass2KHR(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_resolve_subpass(cmd_buffer);
-
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
- 2048);
-
- radv_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1, true);
- radv_cmd_buffer_clear_subpass(cmd_buffer);
+ uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
+ radv_cmd_buffer_end_subpass(cmd_buffer);
+ radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
}
void radv_CmdNextSubpass2KHR(
}
}
-struct radv_draw_info {
- /**
- * Number of vertices.
- */
- uint32_t count;
-
- /**
- * Index of the first vertex.
- */
- int32_t vertex_offset;
-
- /**
- * First instance id.
- */
- uint32_t first_instance;
-
- /**
- * Number of instances.
- */
- uint32_t instance_count;
-
- /**
- * First index (indexed draws only).
- */
- uint32_t first_index;
-
- /**
- * Whether it's an indexed draw.
- */
- bool indexed;
-
- /**
- * Indirect draw parameters resource.
- */
- struct radv_buffer *indirect;
- uint64_t indirect_offset;
- uint32_t stride;
-
- /**
- * Draw count parameters resource.
- */
- struct radv_buffer *count_buffer;
- uint64_t count_buffer_offset;
-
- /**
- * Stream output parameters resource.
- */
- struct radv_buffer *strmout_buffer;
- uint64_t strmout_buffer_offset;
-};
-
static void
radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
const struct radv_draw_info *info)
struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (info->strmout_buffer) {
- uint64_t va = radv_buffer_get_va(info->strmout_buffer->bo);
-
- va += info->strmout_buffer->offset +
- info->strmout_buffer_offset;
-
- radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
- info->stride);
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
- radeon_emit(cs, 0); /* unused */
-
- radv_cs_add_buffer(ws, cs, info->strmout_buffer->bo);
- }
-
if (info->indirect) {
uint64_t va = radv_buffer_get_va(info->indirect->bo);
uint64_t count_va = 0;
* any context registers.
*/
static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
- bool indexed_draw)
+ const struct radv_draw_info *info)
{
struct radv_cmd_state *state = &cmd_buffer->state;
if (!cmd_buffer->device->physical_device->has_scissor_bug)
return false;
+ if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
+ return true;
+
uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
/* Index, vertex and streamout buffers don't change context regs, and
- * pipeline is handled later.
+ * pipeline is already handled.
*/
used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
RADV_CMD_DIRTY_VERTEX_BUFFER |
RADV_CMD_DIRTY_STREAMOUT_BUFFER |
RADV_CMD_DIRTY_PIPELINE);
- /* Assume all state changes except these two can imply context rolls. */
if (cmd_buffer->state.dirty & used_states)
return true;
- if (cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
- return true;
-
- if (indexed_draw && state->pipeline->graphics.prim_restart_enable &&
+ if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
(state->index_type ? 0xffffffffu : 0xffffu) != state->last_primitive_reset_index)
return true;
radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
const struct radv_draw_info *info)
{
- bool late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info->indexed);
+ bool late_scissor_emission;
if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
radv_emit_graphics_pipeline(cmd_buffer);
+ /* This should be before the cmd_buffer->state.dirty is cleared
+ * (excluding RADV_CMD_DIRTY_PIPELINE) and after
+ * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
+ late_scissor_emission =
+ radv_need_late_scissor_emission(cmd_buffer, info);
+
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
radv_emit_framebuffer_state(cmd_buffer);
radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
- radv_emit_draw_registers(cmd_buffer, info->indexed,
- info->instance_count > 1, info->indirect,
- info->indirect ? 0 : info->count);
+ radv_emit_draw_registers(cmd_buffer, info);
if (late_scissor_emission)
radv_emit_scissor(cmd_buffer);
radeon_check_space(cmd_buffer->device->ws,
cmd_buffer->cs, 4096);
+ if (likely(!info->indirect)) {
+ /* SI-CI treat instance_count==0 as instance_count==1. There is
+ * no workaround for indirect draws, but we can at least skip
+ * direct draws.
+ */
+ if (unlikely(!info->instance_count))
+ return;
+
+ /* Handle count == 0. */
+ if (unlikely(!info->count && !info->strmout_buffer))
+ return;
+ }
+
/* Use optimal packet order based on whether we need to sync the
* pipeline.
*/
}
if (loc->sgpr_idx != -1) {
- assert(!loc->indirect);
assert(loc->num_sgprs == 3);
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
-
- radv_cmd_buffer_resolve_subpass(cmd_buffer);
+ radv_cmd_buffer_end_subpass(cmd_buffer);
- for (unsigned i = 0; i < cmd_buffer->state.framebuffer->attachment_count; ++i) {
- VkImageLayout layout = cmd_buffer->state.pass->attachments[i].final_layout;
- radv_handle_subpass_image_transition(cmd_buffer,
- (struct radv_subpass_attachment){i, layout});
- }
+ radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
}
+void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ static const uint32_t fmask_clear_values[4] = {
+ 0x00000000,
+ 0x02020202,
+ 0xE4E4E4E4,
+ 0x76543210
+ };
+ uint32_t log2_samples = util_logbase2(image->info.samples);
+ uint32_t value = fmask_clear_values[log2_samples];
+
+ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+ state->flush_bits |= radv_clear_fmask(cmd_buffer, image, value);
+
+ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+}
+
void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image, uint32_t value)
{
radv_initialise_cmask(cmd_buffer, image, value);
}
+ if (radv_image_has_fmask(image)) {
+ radv_initialize_fmask(cmd_buffer, image);
+ }
+
if (radv_image_has_dcc(image)) {
uint32_t value = 0xffffffffu; /* Fully expanded mode. */
bool need_decompress_pass = false;
!radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
}
+
+ if (radv_image_has_fmask(image)) {
+ if (src_layout != VK_IMAGE_LAYOUT_GENERAL &&
+ dst_layout == VK_IMAGE_LAYOUT_GENERAL) {
+ radv_expand_fmask_image_inplace(cmd_buffer, image, range);
+ }
+ }
}
}
return;
}
+ if (src_layout == dst_layout)
+ return;
+
unsigned src_queue_mask =
radv_image_queue_family_mask(image, src_family,
cmd_buffer->queue_family_index);
uint32_t eventCount;
const VkEvent *pEvents;
VkPipelineStageFlags srcStageMask;
+ VkPipelineStageFlags dstStageMask;
};
static void
image);
}
- radv_stage_flush(cmd_buffer, info->srcStageMask);
+ /* The Vulkan spec 1.1.98 says:
+ *
+ * "An execution dependency with only
+ * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
+ * will only prevent that stage from executing in subsequently
+ * submitted commands. As this stage does not perform any actual
+ * execution, this is not observable - in effect, it does not delay
+ * processing of subsequent commands. Similarly an execution dependency
+ * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
+ * will effectively not wait for any prior commands to complete."
+ */
+ if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+ radv_stage_flush(cmd_buffer, info->srcStageMask);
cmd_buffer->state.flush_bits |= src_flush_bits;
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
info.eventCount = 0;
info.pEvents = NULL;
info.srcStageMask = srcStageMask;
+ info.dstStageMask = destStageMask;
radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
bufferMemoryBarrierCount, pBufferMemoryBarriers,
if (!(stageMask & ~top_of_pipe_flags)) {
/* Just need to sync the PFP engine. */
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
} else if (!(stageMask & ~post_index_fetch_flags)) {
/* Sync ME because PFP reads index and indirect buffers. */
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, va);
cmd_buffer->device->physical_device->rad_info.chip_class,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DATA_SEL_VALUE_32BIT, va, 2, value,
+ EOP_DATA_SEL_VALUE_32BIT, va, value,
cmd_buffer->gfx9_eop_bug_va);
}
draw_visible = false;
}
+ si_emit_cache_flush(cmd_buffer);
+
/* Enable predication for this command buffer. */
si_emit_set_predication_state(cmd_buffer, draw_visible, va);
cmd_buffer->state.predicating = true;
S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
radeon_emit(cs, so->hw_enabled_mask &
so->enabled_stream_buffers_mask);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
for_each_bit(i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount)
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
/* SI binds streamout buffers as shader resources.
radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
/* The array of counter buffers is optional. */
RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
for_each_bit(i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount)
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
* that the primitives-emitted query won't increment.
*/
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
radv_set_streamout_enable(cmd_buffer, false);