unsigned *num_patches)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- struct si_shader_ctx_state *ls = &sctx->vs_shader;
+ struct si_shader *ls_current;
+ struct si_shader_selector *ls;
/* The TES pointer will only be used for sctx->last_tcs.
* It would be wrong to think that TCS = TES. */
struct si_shader_selector *tcs =
unsigned num_tcs_patch_outputs;
unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
unsigned input_patch_size, output_patch_size, output_patch0_offset;
- unsigned perpatch_output_offset, lds_size, ls_rsrc2;
+ unsigned perpatch_output_offset, lds_size;
unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
unsigned offchip_layout, hardware_lds_size, ls_hs_config;
- if (sctx->last_ls == ls->current &&
+ /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
+ if (sctx->b.chip_class >= GFX9) {
+ if (sctx->tcs_shader.cso)
+ ls_current = sctx->tcs_shader.current;
+ else
+ ls_current = sctx->fixed_func_tcs_shader.current;
+
+ ls = ls_current->key.part.tcs.ls;
+ } else {
+ ls_current = sctx->vs_shader.current;
+ ls = sctx->vs_shader.cso;
+ }
+
+ if (sctx->last_ls == ls_current &&
sctx->last_tcs == tcs &&
sctx->last_tes_sh_base == tes_sh_base &&
sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
return;
}
- sctx->last_ls = ls->current;
+ sctx->last_ls = ls_current;
sctx->last_tcs = tcs;
sctx->last_tes_sh_base = tes_sh_base;
sctx->last_num_tcs_input_cp = num_tcs_input_cp;
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
- num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
+ num_tcs_inputs = util_last_bit64(ls->outputs_written);
if (sctx->tcs_shader.cso) {
num_tcs_outputs = util_last_bit64(tcs->outputs_written);
*/
*num_patches = MIN2(*num_patches, 40);
- /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
if (sctx->b.chip_class == SI) {
+ /* SI bug workaround, related to power management. Limit LS-HS
+ * threadgroups to only one wave.
+ */
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
*num_patches = MIN2(*num_patches, one_wave);
+
+ if (sctx->screen->b.info.max_se == 1) {
+ /* The VGT HS block increments the patch ID unconditionally
+ * within a single threadgroup. This results in incorrect
+ * patch IDs when instanced draws are used.
+ *
+ * The intended solution is to restrict threadgroups to
+ * a single instance by setting SWITCH_ON_EOI, which
+ * should cause IA to split instances up. However, this
+ * doesn't work correctly on SI when there is no other
+ * SE to switch to.
+ */
+ *num_patches = 1;
+ }
}
sctx->last_num_patches = *num_patches;
output_patch0_offset = input_patch_size * *num_patches;
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
- lds_size = output_patch0_offset + output_patch_size * *num_patches;
- ls_rsrc2 = ls->current->config.rsrc2;
-
- if (sctx->b.chip_class >= CIK) {
- assert(lds_size <= 65536);
- lds_size = align(lds_size, 512) / 512;
- } else {
- assert(lds_size <= 32768);
- lds_size = align(lds_size, 256) / 256;
- }
- si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
-
- /* Due to a hw bug, RSRC2_LS must be written twice with another
- * LS register written in between. */
- if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
- radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
- radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
- radeon_emit(cs, ls->current->config.rsrc1);
- radeon_emit(cs, ls_rsrc2);
-
/* Compute userdata SGPRs. */
assert(((input_vertex_size / 4) & ~0xff) == 0);
assert(((output_vertex_size / 4) & ~0xff) == 0);
((output_vertex_size / 4) << 13);
tcs_out_offsets = (output_patch0_offset / 16) |
((perpatch_output_offset / 16) << 16);
- offchip_layout = (pervertex_output_patch_size * *num_patches << 16) |
- (num_tcs_output_cp << 9) | *num_patches;
+ offchip_layout = *num_patches |
+ (num_tcs_output_cp << 6) |
+ (pervertex_output_patch_size * *num_patches << 12);
- /* Set them for LS. */
- radeon_set_sh_reg(cs,
- R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
- tcs_in_layout);
+ /* Compute the LDS size. */
+ lds_size = output_patch0_offset + output_patch_size * *num_patches;
- /* Set them for TCS. */
- radeon_set_sh_reg_seq(cs,
- R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
- radeon_emit(cs, offchip_layout);
- radeon_emit(cs, tcs_out_offsets);
- radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
- radeon_emit(cs, tcs_in_layout);
+ if (sctx->b.chip_class >= CIK) {
+ assert(lds_size <= 65536);
+ lds_size = align(lds_size, 512) / 512;
+ } else {
+ assert(lds_size <= 32768);
+ lds_size = align(lds_size, 256) / 256;
+ }
+
+ /* Set SI_SGPR_VS_STATE_BITS. */
+ sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE &
+ C_VS_STATE_LS_OUT_VERTEX_SIZE;
+ sctx->current_vs_state |= tcs_in_layout;
+
+ if (sctx->b.chip_class >= GFX9) {
+ unsigned hs_rsrc2 = ls_current->config.rsrc2 |
+ S_00B42C_LDS_SIZE(lds_size);
+
+ radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
+
+ /* Set userdata SGPRs for merged LS-HS. */
+ radeon_set_sh_reg_seq(cs,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+ GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
+ radeon_emit(cs, offchip_layout);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ } else {
+ unsigned ls_rsrc2 = ls_current->config.rsrc2;
+
+ si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
+
+ /* Due to a hw bug, RSRC2_LS must be written twice with another
+ * LS register written in between. */
+ if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
+ radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+ radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, ls_current->config.rsrc1);
+ radeon_emit(cs, ls_rsrc2);
+
+ /* Set userdata SGPRs for TCS. */
+ radeon_set_sh_reg_seq(cs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
+ radeon_emit(cs, offchip_layout);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ radeon_emit(cs, tcs_in_layout);
+ }
- /* Set them for TES. */
- radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1);
+ /* Set userdata SGPRs for TES. */
+ radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
radeon_emit(cs, offchip_layout);
+ radeon_emit(cs, r600_resource(sctx->tess_offchip_ring)->gpu_address >> 16);
ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
if (sscreen->has_distributed_tess) {
if (key->u.uses_gs) {
- partial_es_wave = true;
+ if (sscreen->b.chip_class <= VI)
+ partial_es_wave = true;
/* GPU hang workaround. */
if (sscreen->b.family == CHIP_TONGA ||
sscreen->b.family == CHIP_FIJI ||
sscreen->b.family == CHIP_POLARIS10 ||
- sscreen->b.family == CHIP_POLARIS11)
+ sscreen->b.family == CHIP_POLARIS11 ||
+ sscreen->b.family == CHIP_POLARIS12)
partial_vs_wave = true;
} else {
partial_vs_wave = true;
}
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
- if (ia_switch_on_eoi)
+ if (sscreen->b.chip_class <= VI && ia_switch_on_eoi)
partial_es_wave = true;
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_WD_SWITCH_ON_EOP(sscreen->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
- S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class >= VI ?
+ /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class == VI ?
max_primgroup_in_wave : 0) |
S_030960_EN_INST_OPT_BASIC(sscreen->b.chip_class >= GFX9) |
S_030960_EN_INST_OPT_ADV(sscreen->b.chip_class >= GFX9);
if (sctx->gs_shader.cso) {
/* GS requirement. */
- if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
+ if (sctx->b.chip_class <= VI &&
+ SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1);
/* GS hw bug with single-primitive instances and SWITCH_ON_EOI.
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
}
-static void si_emit_vs_state(struct si_context *sctx)
+static void si_emit_vs_state(struct si_context *sctx,
+ const struct pipe_draw_info *info)
{
+ sctx->current_vs_state &= C_VS_STATE_INDEXED;
+ sctx->current_vs_state |= S_VS_STATE_INDEXED(!!info->index_size);
+
if (sctx->current_vs_state != sctx->last_vs_state) {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
radeon_set_sh_reg(cs,
- R_00B130_SPI_SHADER_USER_DATA_VS_0 + SI_SGPR_VS_STATE_BITS * 4,
+ sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX] +
+ SI_SGPR_VS_STATE_BITS * 4,
sctx->current_vs_state);
sctx->last_vs_state = sctx->current_vs_state;
static void si_emit_draw_packets(struct si_context *sctx,
const struct pipe_draw_info *info,
- const struct pipe_index_buffer *ib)
+ struct pipe_resource *indexbuf,
+ unsigned index_size,
+ unsigned index_offset)
{
+ struct pipe_draw_indirect_info *indirect = info->indirect;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
}
/* draw packet */
- if (info->indexed) {
- if (ib->index_size != sctx->last_index_size) {
+ if (index_size) {
+ if (index_size != sctx->last_index_size) {
unsigned index_type;
/* index type */
- switch (ib->index_size) {
+ switch (index_size) {
case 1:
index_type = V_028A7C_VGT_INDEX_8;
break;
radeon_emit(cs, index_type);
}
- sctx->last_index_size = ib->index_size;
+ sctx->last_index_size = index_size;
}
- index_max_size = (ib->buffer->width0 - ib->offset) /
- ib->index_size;
- index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
+ index_max_size = (indexbuf->width0 - index_offset) /
+ index_size;
+ index_va = r600_resource(indexbuf)->gpu_address + index_offset;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource *)ib->buffer,
+ (struct r600_resource *)indexbuf,
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
} else {
/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
sctx->last_index_size = -1;
}
- if (!info->indirect) {
- int base_vertex;
-
- radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
- radeon_emit(cs, info->instance_count);
-
- /* Base vertex and start instance. */
- base_vertex = info->indexed ? info->index_bias : info->start;
-
- if (base_vertex != sctx->last_base_vertex ||
- sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
- info->start_instance != sctx->last_start_instance ||
- info->drawid != sctx->last_drawid ||
- sh_base_reg != sctx->last_sh_base_reg) {
- radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3);
- radeon_emit(cs, base_vertex);
- radeon_emit(cs, info->start_instance);
- radeon_emit(cs, info->drawid);
-
- sctx->last_base_vertex = base_vertex;
- sctx->last_start_instance = info->start_instance;
- sctx->last_drawid = info->drawid;
- sctx->last_sh_base_reg = sh_base_reg;
- }
- } else {
- uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
+ if (indirect) {
+ uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address;
assert(indirect_va % 8 == 0);
radeon_emit(cs, indirect_va >> 32);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource *)info->indirect,
+ (struct r600_resource *)indirect->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
- }
- if (info->indirect) {
- unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA
+ unsigned di_src_sel = index_size ? V_0287F0_DI_SRC_SEL_DMA
: V_0287F0_DI_SRC_SEL_AUTO_INDEX;
- assert(info->indirect_offset % 4 == 0);
+ assert(indirect->offset % 4 == 0);
- if (info->indexed) {
+ if (index_size) {
radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
radeon_emit(cs, index_va);
radeon_emit(cs, index_va >> 32);
}
if (!sctx->screen->has_draw_indirect_multi) {
- radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT
+ radeon_emit(cs, PKT3(index_size ? PKT3_DRAW_INDEX_INDIRECT
: PKT3_DRAW_INDIRECT,
3, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, di_src_sel);
} else {
uint64_t count_va = 0;
- if (info->indirect_params) {
+ if (indirect->indirect_draw_count) {
struct r600_resource *params_buf =
- (struct r600_resource *)info->indirect_params;
+ (struct r600_resource *)indirect->indirect_draw_count;
radeon_add_to_buffer_list(
&sctx->b, &sctx->b.gfx, params_buf,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
- count_va = params_buf->gpu_address + info->indirect_params_offset;
+ count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset;
}
- radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
+ radeon_emit(cs, PKT3(index_size ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
PKT3_DRAW_INDIRECT_MULTI,
8, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
S_2C3_DRAW_INDEX_ENABLE(1) |
- S_2C3_COUNT_INDIRECT_ENABLE(!!info->indirect_params));
- radeon_emit(cs, info->indirect_count);
+ S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
+ radeon_emit(cs, indirect->draw_count);
radeon_emit(cs, count_va);
radeon_emit(cs, count_va >> 32);
- radeon_emit(cs, info->indirect_stride);
+ radeon_emit(cs, indirect->stride);
radeon_emit(cs, di_src_sel);
}
} else {
- if (info->indexed) {
- index_va += info->start * ib->index_size;
+ int base_vertex;
+
+ radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
+ radeon_emit(cs, info->instance_count);
+
+ /* Base vertex and start instance. */
+ base_vertex = index_size ? info->index_bias : info->start;
+
+ if (base_vertex != sctx->last_base_vertex ||
+ sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
+ info->start_instance != sctx->last_start_instance ||
+ info->drawid != sctx->last_drawid ||
+ sh_base_reg != sctx->last_sh_base_reg) {
+ radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3);
+ radeon_emit(cs, base_vertex);
+ radeon_emit(cs, info->start_instance);
+ radeon_emit(cs, info->drawid);
+
+ sctx->last_base_vertex = base_vertex;
+ sctx->last_start_instance = info->start_instance;
+ sctx->last_drawid = info->drawid;
+ sctx->last_sh_base_reg = sh_base_reg;
+ }
+
+ if (index_size) {
+ index_va += info->start * index_size;
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
radeon_emit(cs, index_max_size);
const struct pipe_draw_info *info,
unsigned *start, unsigned *count)
{
- if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
+ if (indirect) {
unsigned indirect_count;
struct pipe_transfer *transfer;
unsigned begin, end;
unsigned map_size;
unsigned *data;
- if (info->indirect_params) {
+ if (indirect->indirect_draw_count) {
data = pipe_buffer_map_range(&sctx->b.b,
- info->indirect_params,
- info->indirect_params_offset,
+ indirect->indirect_draw_count,
+ indirect->indirect_draw_count_offset,
sizeof(unsigned),
PIPE_TRANSFER_READ, &transfer);
pipe_buffer_unmap(&sctx->b.b, transfer);
} else {
- indirect_count = info->indirect_count;
+ indirect_count = indirect->draw_count;
}
if (!indirect_count) {
return;
}
- map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned);
- data = pipe_buffer_map_range(&sctx->b.b, info->indirect,
- info->indirect_offset, map_size,
+ map_size = (indirect_count - 1) * indirect->stride + 3 * sizeof(unsigned);
+ data = pipe_buffer_map_range(&sctx->b.b, indirect->buffer,
+ indirect->offset, map_size,
PIPE_TRANSFER_READ, &transfer);
begin = UINT_MAX;
end = MAX2(end, start + count);
}
- data += info->indirect_stride / sizeof(unsigned);
+ data += indirect->stride / sizeof(unsigned);
}
pipe_buffer_unmap(&sctx->b.b, transfer);
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- const struct pipe_index_buffer *ib = &sctx->index_buffer;
- struct pipe_index_buffer ib_tmp; /* for index buffer uploads only */
+ struct pipe_resource *indexbuf = info->index.resource;
unsigned mask, dirty_tex_counter;
enum pipe_prim_type rast_prim;
unsigned num_patches = 0;
+ unsigned index_size = info->index_size;
+ unsigned index_offset = info->indirect ? info->start * index_size : 0;
if (likely(!info->indirect)) {
/* SI-CI treat instance_count==0 as instance_count==1. There is
/* Handle count == 0. */
if (unlikely(!info->count &&
- (info->indexed || !info->count_from_stream_output)))
+ (index_size || !info->count_from_stream_output)))
return;
}
if (!si_upload_graphics_shader_descriptors(sctx))
return;
- ib_tmp.buffer = NULL;
-
- if (info->indexed) {
+ if (index_size) {
/* Translate or upload, if needed. */
/* 8-bit indices are supported on VI. */
- if (sctx->b.chip_class <= CIK && ib->index_size == 1) {
- unsigned start, count, start_offset, size;
+ if (sctx->b.chip_class <= CIK && index_size == 1) {
+ unsigned start, count, start_offset, size, offset;
void *ptr;
si_get_draw_start_count(sctx, info, &start, &count);
start_offset = start * 2;
size = count * 2;
+ indexbuf = NULL;
u_upload_alloc(ctx->stream_uploader, start_offset,
size,
si_optimal_tcc_alignment(sctx, size),
- &ib_tmp.offset, &ib_tmp.buffer, &ptr);
- if (!ib_tmp.buffer)
+ &offset, &indexbuf, &ptr);
+ if (!indexbuf)
return;
- util_shorten_ubyte_elts_to_userptr(&sctx->b.b, ib, 0, 0,
- ib->offset + start,
+ util_shorten_ubyte_elts_to_userptr(&sctx->b.b, info, 0, 0,
+ index_offset + start,
count, ptr);
/* info->start will be added by the drawing code */
- ib_tmp.offset -= start_offset;
- ib_tmp.index_size = 2;
- ib = &ib_tmp;
- } else if (ib->user_buffer && !ib->buffer) {
- unsigned start, count, start_offset;
+ index_offset = offset - start_offset;
+ index_size = 2;
+ } else if (info->has_user_indices) {
+ unsigned start_offset;
- si_get_draw_start_count(sctx, info, &start, &count);
- start_offset = start * ib->index_size;
+ assert(!info->indirect);
+ start_offset = info->start * index_size;
+ indexbuf = NULL;
u_upload_data(ctx->stream_uploader, start_offset,
- count * ib->index_size,
+ info->count * index_size,
sctx->screen->b.info.tcc_cache_line_size,
- (char*)ib->user_buffer + start_offset,
- &ib_tmp.offset, &ib_tmp.buffer);
- if (!ib_tmp.buffer)
+ (char*)info->index.user + start_offset,
+ &index_offset, &indexbuf);
+ if (!indexbuf)
return;
/* info->start will be added by the drawing code */
- ib_tmp.offset -= start_offset;
- ib_tmp.index_size = ib->index_size;
- ib = &ib_tmp;
+ index_offset -= start_offset;
} else if (sctx->b.chip_class <= CIK &&
- r600_resource(ib->buffer)->TC_L2_dirty) {
+ r600_resource(indexbuf)->TC_L2_dirty) {
/* VI reads index buffers through TC L2, so it doesn't
* need this. */
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(ib->buffer)->TC_L2_dirty = false;
+ r600_resource(indexbuf)->TC_L2_dirty = false;
}
}
if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
/* Add the buffer size for memory checking in need_cs_space. */
- r600_context_add_resource_size(ctx, info->indirect);
+ r600_context_add_resource_size(ctx, indirect->buffer);
- if (r600_resource(info->indirect)->TC_L2_dirty) {
+ if (r600_resource(indirect->buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(info->indirect)->TC_L2_dirty = false;
+ r600_resource(indirect->buffer)->TC_L2_dirty = false;
}
- if (info->indirect_params &&
- r600_resource(info->indirect_params)->TC_L2_dirty) {
+ if (indirect->indirect_draw_count &&
+ r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(info->indirect_params)->TC_L2_dirty = false;
+ r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}
}
si_emit_rasterizer_prim_state(sctx);
if (sctx->tes_shader.cso)
si_emit_derived_tess_state(sctx, info, &num_patches);
- si_emit_vs_state(sctx);
+ si_emit_vs_state(sctx, info);
si_emit_draw_registers(sctx, info, num_patches);
si_ce_pre_draw_synchronization(sctx);
- si_emit_draw_packets(sctx, info, ib);
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
si_ce_post_draw_synchronization(sctx);
if (sctx->trace_buf)
sctx->framebuffer.do_update_surf_dirtiness = false;
}
- pipe_resource_reference(&ib_tmp.buffer, NULL);
sctx->b.num_draw_calls++;
+ if (info->primitive_restart)
+ sctx->b.num_prim_restart_calls++;
if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
sctx->b.num_spill_draw_calls++;
+ if (index_size && indexbuf != info->index.resource)
+ pipe_resource_reference(&indexbuf, NULL);
}
void si_trace_emit(struct si_context *sctx)