*/
*num_patches = MIN2(*num_patches, 40);
- /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
if (sctx->b.chip_class == SI) {
+ /* SI bug workaround, related to power management. Limit LS-HS
+ * threadgroups to only one wave.
+ */
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
*num_patches = MIN2(*num_patches, one_wave);
+
+ if (sctx->screen->b.info.max_se == 1) {
+ /* The VGT HS block increments the patch ID unconditionally
+ * within a single threadgroup. This results in incorrect
+ * patch IDs when instanced draws are used.
+ *
+ * The intended solution is to restrict threadgroups to
+ * a single instance by setting SWITCH_ON_EOI, which
+ * should cause IA to split instances up. However, this
+ * doesn't work correctly on SI when there is no other
+ * SE to switch to.
+ */
+ *num_patches = 1;
+ }
}
sctx->last_num_patches = *num_patches;
if (key->u.uses_tess) {
/* SWITCH_ON_EOI must be set if PrimID is used. */
- if (key->u.tcs_tes_uses_prim_id)
+ if (key->u.tess_uses_prim_id)
ia_switch_on_eoi = true;
/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
key.u.count_from_stream_output = count_from_so;
key.u.line_stipple_enabled = line_stipple;
key.u.uses_tess = uses_tess;
- key.u.tcs_tes_uses_prim_id = tess_uses_primid;
+ key.u.tess_uses_prim_id = tess_uses_primid;
key.u.uses_gs = uses_gs;
sctx->ia_multi_vgt_param[key.index] =
const struct pipe_draw_info *info)
{
sctx->current_vs_state &= C_VS_STATE_INDEXED;
- sctx->current_vs_state |= S_VS_STATE_INDEXED(!!info->indexed);
+ sctx->current_vs_state |= S_VS_STATE_INDEXED(!!info->index_size);
if (sctx->current_vs_state != sctx->last_vs_state) {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
static void si_emit_draw_packets(struct si_context *sctx,
const struct pipe_draw_info *info,
- const struct pipe_index_buffer *ib)
+ struct pipe_resource *indexbuf,
+ unsigned index_size,
+ unsigned index_offset)
{
+ struct pipe_draw_indirect_info *indirect = info->indirect;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
}
/* draw packet */
- if (info->indexed) {
- if (ib->index_size != sctx->last_index_size) {
+ if (index_size) {
+ if (index_size != sctx->last_index_size) {
unsigned index_type;
/* index type */
- switch (ib->index_size) {
+ switch (index_size) {
case 1:
index_type = V_028A7C_VGT_INDEX_8;
break;
radeon_emit(cs, index_type);
}
- sctx->last_index_size = ib->index_size;
+ sctx->last_index_size = index_size;
}
- index_max_size = (ib->buffer->width0 - ib->offset) /
- ib->index_size;
- index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
+ index_max_size = (indexbuf->width0 - index_offset) /
+ index_size;
+ index_va = r600_resource(indexbuf)->gpu_address + index_offset;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource *)ib->buffer,
+ (struct r600_resource *)indexbuf,
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
} else {
/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
sctx->last_index_size = -1;
}
- if (info->indirect) {
- uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
+ if (indirect) {
+ uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address;
assert(indirect_va % 8 == 0);
radeon_emit(cs, indirect_va >> 32);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource *)info->indirect,
+ (struct r600_resource *)indirect->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
- unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA
+ unsigned di_src_sel = index_size ? V_0287F0_DI_SRC_SEL_DMA
: V_0287F0_DI_SRC_SEL_AUTO_INDEX;
- assert(info->indirect_offset % 4 == 0);
+ assert(indirect->offset % 4 == 0);
- if (info->indexed) {
+ if (index_size) {
radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
radeon_emit(cs, index_va);
radeon_emit(cs, index_va >> 32);
}
if (!sctx->screen->has_draw_indirect_multi) {
- radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT
+ radeon_emit(cs, PKT3(index_size ? PKT3_DRAW_INDEX_INDIRECT
: PKT3_DRAW_INDIRECT,
3, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, di_src_sel);
} else {
uint64_t count_va = 0;
- if (info->indirect_params) {
+ if (indirect->indirect_draw_count) {
struct r600_resource *params_buf =
- (struct r600_resource *)info->indirect_params;
+ (struct r600_resource *)indirect->indirect_draw_count;
radeon_add_to_buffer_list(
&sctx->b, &sctx->b.gfx, params_buf,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
- count_va = params_buf->gpu_address + info->indirect_params_offset;
+ count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset;
}
- radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
+ radeon_emit(cs, PKT3(index_size ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
PKT3_DRAW_INDIRECT_MULTI,
8, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
S_2C3_DRAW_INDEX_ENABLE(1) |
- S_2C3_COUNT_INDIRECT_ENABLE(!!info->indirect_params));
- radeon_emit(cs, info->indirect_count);
+ S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
+ radeon_emit(cs, indirect->draw_count);
radeon_emit(cs, count_va);
radeon_emit(cs, count_va >> 32);
- radeon_emit(cs, info->indirect_stride);
+ radeon_emit(cs, indirect->stride);
radeon_emit(cs, di_src_sel);
}
} else {
radeon_emit(cs, info->instance_count);
/* Base vertex and start instance. */
- base_vertex = info->indexed ? info->index_bias : info->start;
+ base_vertex = index_size ? info->index_bias : info->start;
if (base_vertex != sctx->last_base_vertex ||
sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
sctx->last_sh_base_reg = sh_base_reg;
}
- if (info->indexed) {
- index_va += info->start * ib->index_size;
+ if (index_size) {
+ index_va += info->start * index_size;
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
radeon_emit(cs, index_max_size);
const struct pipe_draw_info *info,
unsigned *start, unsigned *count)
{
- if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
+ if (indirect) {
unsigned indirect_count;
struct pipe_transfer *transfer;
unsigned begin, end;
unsigned map_size;
unsigned *data;
- if (info->indirect_params) {
+ if (indirect->indirect_draw_count) {
data = pipe_buffer_map_range(&sctx->b.b,
- info->indirect_params,
- info->indirect_params_offset,
+ indirect->indirect_draw_count,
+ indirect->indirect_draw_count_offset,
sizeof(unsigned),
PIPE_TRANSFER_READ, &transfer);
pipe_buffer_unmap(&sctx->b.b, transfer);
} else {
- indirect_count = info->indirect_count;
+ indirect_count = indirect->draw_count;
}
if (!indirect_count) {
return;
}
- map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned);
- data = pipe_buffer_map_range(&sctx->b.b, info->indirect,
- info->indirect_offset, map_size,
+ map_size = (indirect_count - 1) * indirect->stride + 3 * sizeof(unsigned);
+ data = pipe_buffer_map_range(&sctx->b.b, indirect->buffer,
+ indirect->offset, map_size,
PIPE_TRANSFER_READ, &transfer);
begin = UINT_MAX;
end = MAX2(end, start + count);
}
- data += info->indirect_stride / sizeof(unsigned);
+ data += indirect->stride / sizeof(unsigned);
}
pipe_buffer_unmap(&sctx->b.b, transfer);
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- const struct pipe_index_buffer *ib = &sctx->index_buffer;
- struct pipe_index_buffer ib_tmp; /* for index buffer uploads only */
+ struct pipe_resource *indexbuf = info->index.resource;
unsigned mask, dirty_tex_counter;
enum pipe_prim_type rast_prim;
unsigned num_patches = 0;
+ unsigned index_size = info->index_size;
+ unsigned index_offset = info->indirect ? info->start * index_size : 0;
if (likely(!info->indirect)) {
/* SI-CI treat instance_count==0 as instance_count==1. There is
/* Handle count == 0. */
if (unlikely(!info->count &&
- (info->indexed || !info->count_from_stream_output)))
+ (index_size || !info->count_from_stream_output)))
return;
}
if (!si_upload_graphics_shader_descriptors(sctx))
return;
- ib_tmp.buffer = NULL;
-
- if (info->indexed) {
+ if (index_size) {
/* Translate or upload, if needed. */
/* 8-bit indices are supported on VI. */
- if (sctx->b.chip_class <= CIK && ib->index_size == 1) {
- unsigned start, count, start_offset, size;
+ if (sctx->b.chip_class <= CIK && index_size == 1) {
+ unsigned start, count, start_offset, size, offset;
void *ptr;
si_get_draw_start_count(sctx, info, &start, &count);
start_offset = start * 2;
size = count * 2;
+ indexbuf = NULL;
u_upload_alloc(ctx->stream_uploader, start_offset,
size,
si_optimal_tcc_alignment(sctx, size),
- &ib_tmp.offset, &ib_tmp.buffer, &ptr);
- if (!ib_tmp.buffer)
+ &offset, &indexbuf, &ptr);
+ if (!indexbuf)
return;
- util_shorten_ubyte_elts_to_userptr(&sctx->b.b, ib, 0, 0,
- ib->offset + start,
+ util_shorten_ubyte_elts_to_userptr(&sctx->b.b, info, 0, 0,
+ index_offset + start,
count, ptr);
/* info->start will be added by the drawing code */
- ib_tmp.offset -= start_offset;
- ib_tmp.index_size = 2;
- ib = &ib_tmp;
- } else if (ib->user_buffer && !ib->buffer) {
+ index_offset = offset - start_offset;
+ index_size = 2;
+ } else if (info->has_user_indices) {
unsigned start_offset;
assert(!info->indirect);
- start_offset = info->start * ib->index_size;
+ start_offset = info->start * index_size;
+ indexbuf = NULL;
u_upload_data(ctx->stream_uploader, start_offset,
- info->count * ib->index_size,
+ info->count * index_size,
sctx->screen->b.info.tcc_cache_line_size,
- (char*)ib->user_buffer + start_offset,
- &ib_tmp.offset, &ib_tmp.buffer);
- if (!ib_tmp.buffer)
+ (char*)info->index.user + start_offset,
+ &index_offset, &indexbuf);
+ if (!indexbuf)
return;
/* info->start will be added by the drawing code */
- ib_tmp.offset -= start_offset;
- ib_tmp.index_size = ib->index_size;
- ib = &ib_tmp;
+ index_offset -= start_offset;
} else if (sctx->b.chip_class <= CIK &&
- r600_resource(ib->buffer)->TC_L2_dirty) {
+ r600_resource(indexbuf)->TC_L2_dirty) {
/* VI reads index buffers through TC L2, so it doesn't
* need this. */
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(ib->buffer)->TC_L2_dirty = false;
+ r600_resource(indexbuf)->TC_L2_dirty = false;
}
}
if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
/* Add the buffer size for memory checking in need_cs_space. */
- r600_context_add_resource_size(ctx, info->indirect);
+ r600_context_add_resource_size(ctx, indirect->buffer);
- if (r600_resource(info->indirect)->TC_L2_dirty) {
+ if (r600_resource(indirect->buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(info->indirect)->TC_L2_dirty = false;
+ r600_resource(indirect->buffer)->TC_L2_dirty = false;
}
- if (info->indirect_params &&
- r600_resource(info->indirect_params)->TC_L2_dirty) {
+ if (indirect->indirect_draw_count &&
+ r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(info->indirect_params)->TC_L2_dirty = false;
+ r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}
}
si_emit_draw_registers(sctx, info, num_patches);
si_ce_pre_draw_synchronization(sctx);
- si_emit_draw_packets(sctx, info, ib);
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
si_ce_post_draw_synchronization(sctx);
if (sctx->trace_buf)
sctx->framebuffer.do_update_surf_dirtiness = false;
}
- pipe_resource_reference(&ib_tmp.buffer, NULL);
sctx->b.num_draw_calls++;
if (info->primitive_restart)
sctx->b.num_prim_restart_calls++;
if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
sctx->b.num_spill_draw_calls++;
+ if (index_size && indexbuf != info->index.resource)
+ pipe_resource_reference(&indexbuf, NULL);
}
void si_trace_emit(struct si_context *sctx)