shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
- shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs);
+ shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
S_00B428_SGPRS((num_sgprs - 1) / 8));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
- S_00B42C_USER_SGPR(num_user_sgprs));
+ S_00B42C_USER_SGPR(num_user_sgprs) |
+ S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
}
static void si_shader_es(struct si_shader *shader)
si_set_tesseval_regs(shader, pm4);
}
+static unsigned si_gs_get_max_stream(struct si_shader *shader)
+{
+ struct pipe_stream_output_info *so = &shader->selector->so;
+ unsigned max_stream = 0, i;
+
+ if (so->num_outputs == 0)
+ return 0;
+
+ for (i = 0; i < so->num_outputs; i++) {
+ if (so->output[i].stream > max_stream)
+ max_stream = so->output[i].stream;
+ }
+ return max_stream;
+}
+
static void si_shader_gs(struct si_shader *shader)
{
- unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
+ unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
- unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
+ unsigned max_stream = si_gs_get_max_stream(shader);
/* The GSVS_RING_ITEMSIZE register takes 15 bits */
assert(gsvs_itemsize < (1 << 15));
S_028A40_GS_WRITE_OPTIMIZE(1));
si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
- si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
- si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
+ si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
+ si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
- si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
+ si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
- si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
+ si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 2);
+ si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) ? gs_vert_itemsize >> 2 : 0);
+ si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, (max_stream >= 2) ? gs_vert_itemsize >> 2 : 0);
+ si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, (max_stream >= 3) ? gs_vert_itemsize >> 2 : 0);
si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
uint64_t va;
unsigned window_space =
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+ bool enable_prim_id = si_vs_exports_prim_id(shader);
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
+ /* If this is the GS copy shader, the GS state writes this register.
+ * Otherwise, the VS state writes it.
+ */
+ if (!shader->is_gs_copy_shader) {
+ si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
+ S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+ si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
+ } else
+ si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
+
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = 3; /* all components are needed for TES */
key->vs.as_es = 1;
key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
}
+
+ if (!sctx->gs_shader && sctx->ps_shader &&
+ sctx->ps_shader->info.uses_primid)
+ key->vs.export_prim_id = 1;
break;
case PIPE_SHADER_TESS_CTRL:
key->tcs.prim_mode =
if (sctx->gs_shader) {
key->tes.as_es = 1;
key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
- }
+ } else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
+ key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
break;
}
si_shader_init_pm4_state(shader);
sel->num_shaders++;
+ p_atomic_inc(&sctx->screen->b.num_compilations);
}
return 0;
sel->tokens = tgsi_dup_tokens(state->tokens);
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
+ p_atomic_inc(&sscreen->b.num_shaders_created);
switch (pipe_shader_type) {
case PIPE_SHADER_GEOMETRY:
return;
sctx->vs_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
}
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
return;
sctx->gs_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
if (enable_changed)
return;
sctx->tes_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
if (enable_changed) {
}
}
- if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
+ if (name == TGSI_SEMANTIC_PRIMID)
+ /* PrimID is written after the last output. */
+ tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+ else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
/* No corresponding output found, load defaults into input.
* Don't set any other bits.
* (FLAT_SHADE=1 completely changes behavior) */
static void si_init_gs_rings(struct si_context *sctx)
{
unsigned esgs_ring_size = 128 * 1024;
- unsigned gsvs_ring_size = 64 * 1024 * 1024;
+ unsigned gsvs_ring_size = 60 * 1024 * 1024;
assert(!sctx->gs_rings);
sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
PIPE_USAGE_DEFAULT, gsvs_ring_size);
if (sctx->b.chip_class >= CIK) {
+ if (sctx->b.chip_class >= VI) {
+ /* The maximum sizes are 63.999 MB on VI, because
+ * the register fields only have 18 bits. */
+ assert(esgs_ring_size / 256 < (1 << 18));
+ assert(gsvs_ring_size / 256 < (1 << 18));
+ }
si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
esgs_ring_size / 256);
si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
- true, true, 4, 64);
+ true, true, 4, 64, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
- false, false, 0, 0);
+ false, false, 0, 0, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
sctx->gsvs_ring, 0, gsvs_ring_size,
- false, false, 0, 0);
+ false, false, 0, 0, 0);
}
+static void si_update_gs_rings(struct si_context *sctx)
+{
+ unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
+ unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
+ unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ uint64_t offset;
+
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, 0);
+
+ offset = gsvs_itemsize * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_1,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+ offset = (gsvs_itemsize * 2) * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_2,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+ offset = (gsvs_itemsize * 3) * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+}
/**
* @returns 1 if \p sel has been updated to use a new scratch buffer and 0
* otherwise.
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
{
-
- return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
- si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
- si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+ unsigned bytes = 0;
+
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tcs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tes_shader));
+ return bytes;
}
static void si_update_spi_tmpring_size(struct si_context *sctx)
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
if (si_update_scratch_buffer(sctx, sctx->gs_shader))
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+ if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
- /* VS can be bound as ES or VS. */
- if (sctx->gs_shader) {
+ /* VS can be bound as LS, ES, or VS. */
+ if (sctx->tes_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+ } else if (sctx->gs_shader) {
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
} else {
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
}
+
+ /* TES can be bound as ES or VS. */
+ if (sctx->gs_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ } else {
+ if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ }
}
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
- sctx->tf_ring->width0, false, false, 0, 0);
+ sctx->tf_ring->width0, false, false, 0, 0, 0);
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
}
}
si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
- if (!sctx->gs_shader)
- si_pm4_set_reg(*pm4, R_028A40_VGT_GS_MODE, 0);
}
si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
}
+static void si_update_so(struct si_context *sctx, struct si_shader_selector *shader)
+{
+ struct pipe_stream_output_info *so = &shader->so;
+ uint32_t enabled_stream_buffers_mask = 0;
+ int i;
+
+ for (i = 0; i < so->num_outputs; i++)
+ enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << (so->output[i].stream * 4);
+ sctx->b.streamout.enabled_stream_buffers_mask = enabled_stream_buffers_mask;
+ sctx->b.streamout.stride_in_dw = shader->so.stride;
+}
+
void si_update_shaders(struct si_context *sctx)
{
struct pipe_context *ctx = (struct pipe_context*)sctx;
} else {
/* TES as VS */
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
- sctx->b.streamout.stride_in_dw = sctx->tes_shader->so.stride;
+ si_update_so(sctx, sctx->tes_shader);
}
} else if (sctx->gs_shader) {
/* VS as ES */
/* VS as VS */
si_shader_select(ctx, sctx->vs_shader);
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
- sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
+ si_update_so(sctx, sctx->vs_shader);
}
/* Update GS. */
si_shader_select(ctx, sctx->gs_shader);
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
- sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
+ si_update_so(sctx, sctx->gs_shader);
if (!sctx->gs_rings)
si_init_gs_rings(sctx);
+
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
- si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
- sctx->gsvs_ring,
- sctx->gs_shader->gs_max_out_vertices *
- sctx->gs_shader->info.num_outputs * 16,
- 64, true, true, 4, 16);
+ si_update_gs_rings(sctx);
} else {
si_pm4_bind_state(sctx, gs_rings, NULL);
si_pm4_bind_state(sctx, gs, NULL);
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
- sctx->msaa_config.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}