si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
/* We need at least 2 components for LS.
- * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
- vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+ * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
/* We need at least 2 components for LS.
- * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
- ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
-
- if (shader->config.scratch_bytes_per_wave) {
- fprintf(stderr, "HS: scratch buffer unsupported");
- abort();
- }
+ * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
shader->config.rsrc2 =
S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
- vgpr_comp_cnt = 3; /* all components are needed for TES */
+ vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
num_user_sgprs = SI_TES_NUM_USER_SGPR;
} else
unreachable("invalid shader selector type");
struct gfx9_gs_info gs_info;
if (es_type == PIPE_SHADER_VERTEX)
- es_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+ es_vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
else if (es_type == PIPE_SHADER_TESS_EVAL)
- es_vgpr_comp_cnt = 3; /* all components are needed for TES */
+ es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2;
else
unreachable("invalid shader selector type");
polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es,
NULL, pm4);
-
- if (shader->config.scratch_bytes_per_wave) {
- fprintf(stderr, "GS: scratch buffer unsupported");
- abort();
- }
} else {
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
unsigned oc_lds_en;
unsigned window_space =
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
- bool enable_prim_id = si_vs_exports_prim_id(shader);
+ bool enable_prim_id = shader->key.mono.vs_export_prim_id || shader->selector->info.uses_primid;
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
* not sent again.
*/
if (!gs) {
- si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
- S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+ unsigned mode = 0;
+
+ /* PrimID needs GS scenario A.
+ * GFX9 also needs it when ViewportIndex is enabled.
+ */
+ if (enable_prim_id ||
+ (sscreen->b.chip_class >= GFX9 &&
+ shader->selector->info.writes_viewport_index))
+ mode = V_028A40_GS_SCENARIO_A;
+
+ si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, S_028A40_MODE(mode));
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
} else {
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs));
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
+ * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
- vgpr_comp_cnt = 3; /* all components are needed for TES */
+ vgpr_comp_cnt = enable_prim_id ? 3 : 2;
num_user_sgprs = SI_TES_NUM_USER_SGPR;
} else
unreachable("invalid shader selector type");
/* Find out which VS outputs aren't used by the PS. */
uint64_t outputs_written = vs->outputs_written;
- uint32_t outputs_written2 = vs->outputs_written2;
uint64_t inputs_read = 0;
- uint32_t inputs_read2 = 0;
outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
if (!ps_disabled) {
inputs_read = ps->inputs_read;
- inputs_read2 = ps->inputs_read2;
}
uint64_t linked = outputs_written & inputs_read;
- uint32_t linked2 = outputs_written2 & inputs_read2;
key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
- key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
}
/* Compute the key for the hw shader variant */
si_shader_selector_key_hw_vs(sctx, sel, key);
if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->part.vs.epilog.export_prim_id = 1;
+ key->mono.vs_export_prim_id = 1;
}
break;
case PIPE_SHADER_TESS_CTRL:
si_shader_selector_key_hw_vs(sctx, sel, key);
if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->part.tes.epilog.export_prim_id = 1;
+ key->mono.vs_export_prim_id = 1;
}
break;
case PIPE_SHADER_GEOMETRY:
return true;
}
+static void si_destroy_shader_selector(struct si_context *sctx,
+ struct si_shader_selector *sel);
+
+static void si_shader_selector_reference(struct si_context *sctx,
+ struct si_shader_selector **dst,
+ struct si_shader_selector *src)
+{
+ if (pipe_reference(&(*dst)->reference, &src->reference))
+ si_destroy_shader_selector(sctx, *dst);
+
+ *dst = src;
+}
+
/* Select the hw shader variant depending on the current state. */
static int si_shader_select_with_key(struct si_screen *sscreen,
struct si_shader_ctx_state *state,
int thread_index)
{
struct si_shader_selector *sel = state->cso;
+ struct si_shader_selector *previous_stage_sel = NULL;
struct si_shader *current = state->current;
struct si_shader *iter, *shader = NULL;
shader->key = *key;
shader->compiler_ctx_state = *compiler_state;
+ /* If this is a merged shader, get the first shader's selector. */
+ if (sscreen->b.chip_class >= GFX9) {
+ if (sel->type == PIPE_SHADER_TESS_CTRL)
+ previous_stage_sel = key->part.tcs.ls;
+ else if (sel->type == PIPE_SHADER_GEOMETRY)
+ previous_stage_sel = key->part.gs.es;
+ }
+
/* Compile the main shader part if it doesn't exist. This can happen
* if the initial guess was wrong. */
bool is_pure_monolithic =
* For merged shaders, check that the starting shader's main
* part is present.
*/
- if (sscreen->b.chip_class >= GFX9 &&
- (sel->type == PIPE_SHADER_TESS_CTRL ||
- sel->type == PIPE_SHADER_GEOMETRY)) {
- struct si_shader_selector *shader1 = NULL;
+ if (previous_stage_sel) {
struct si_shader_key shader1_key = zeroed;
- if (sel->type == PIPE_SHADER_TESS_CTRL) {
- shader1 = key->part.tcs.ls;
+ if (sel->type == PIPE_SHADER_TESS_CTRL)
shader1_key.as_ls = 1;
- } else if (sel->type == PIPE_SHADER_GEOMETRY) {
- shader1 = key->part.gs.es;
+ else if (sel->type == PIPE_SHADER_GEOMETRY)
shader1_key.as_es = 1;
- } else
+ else
assert(0);
- ok = si_check_missing_main_part(sscreen, shader1,
+ ok = si_check_missing_main_part(sscreen,
+ previous_stage_sel,
compiler_state, &shader1_key);
} else {
ok = si_check_missing_main_part(sscreen, sel,
}
}
+ /* Keep the reference to the 1st shader of merged shaders, so that
+ * Gallium can't destroy it before we destroy the 2nd shader.
+ *
+ * Set sctx = NULL, because it's unused if we're not releasing
+ * the shader, and we don't have any sctx here.
+ */
+ si_shader_selector_reference(NULL, &shader->previous_stage_sel,
+ previous_stage_sel);
+
/* Monolithic-only shaders don't make a distinction between optimized
* and unoptimized. */
shader->is_monolithic =
switch (name) {
case TGSI_SEMANTIC_GENERIC:
/* don't process indices the function can't handle */
- if (index >= 60)
+ if (index >= SI_MAX_IO_GENERIC)
break;
/* fall through */
- case TGSI_SEMANTIC_CLIPDIST:
+ default:
id = si_shader_io_get_unique_index(name, index);
sel->outputs_written &= ~(1ull << id);
break;
case TGSI_SEMANTIC_CLIPVERTEX:
case TGSI_SEMANTIC_EDGEFLAG:
break;
- default:
- id = si_shader_io_get_unique_index2(name, index);
- sel->outputs_written2 &= ~(1u << id);
}
}
}
if (!sel)
return NULL;
+ pipe_reference_init(&sel->reference, 1);
sel->screen = sscreen;
sel->compiler_ctx_state.tm = sctx->tm;
sel->compiler_ctx_state.debug = sctx->b.debug;
case PIPE_SHADER_TESS_CTRL:
/* Always reserve space for these. */
sel->patch_outputs_written |=
- (1llu << si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0)) |
- (1llu << si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0));
+ (1llu << si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0)) |
+ (1llu << si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0));
/* fall through */
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_TESS_EVAL:
case TGSI_SEMANTIC_TESSOUTER:
case TGSI_SEMANTIC_PATCH:
sel->patch_outputs_written |=
- 1llu << si_shader_io_get_unique_index(name, index);
+ 1llu << si_shader_io_get_unique_index_patch(name, index);
break;
case TGSI_SEMANTIC_GENERIC:
/* don't process indices the function can't handle */
- if (index >= 60)
+ if (index >= SI_MAX_IO_GENERIC)
break;
/* fall through */
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_CLIPDIST:
+ default:
sel->outputs_written |=
1llu << si_shader_io_get_unique_index(name, index);
break;
case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
case TGSI_SEMANTIC_EDGEFLAG:
break;
- default:
- sel->outputs_written2 |=
- 1u << si_shader_io_get_unique_index2(name, index);
}
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
unsigned index = sel->info.input_semantic_index[i];
switch (name) {
- case TGSI_SEMANTIC_CLIPDIST:
case TGSI_SEMANTIC_GENERIC:
+ /* don't process indices the function can't handle */
+ if (index >= SI_MAX_IO_GENERIC)
+ break;
+ /* fall through */
+ default:
sel->inputs_read |=
1llu << si_shader_io_get_unique_index(name, index);
break;
case TGSI_SEMANTIC_PCOORD: /* ignore this */
break;
- default:
- sel->inputs_read2 |=
- 1u << si_shader_io_get_unique_index2(name, index);
}
}
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
}
+static void si_update_tess_uses_prim_id(struct si_context *sctx)
+{
+ sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id =
+ (sctx->tes_shader.cso &&
+ sctx->tes_shader.cso->info.uses_primid) ||
+ (sctx->tcs_shader.cso &&
+ sctx->tcs_shader.cso->info.uses_primid) ||
+ (sctx->gs_shader.cso &&
+ sctx->gs_shader.cso->info.uses_primid) ||
+ (sctx->ps_shader.cso && !sctx->gs_shader.cso &&
+ sctx->ps_shader.cso->info.uses_primid);
+}
+
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
- if (enable_changed)
+ if (enable_changed) {
si_shader_change_notify(sctx);
+ if (sctx->ia_multi_vgt_param_key.u.uses_tess)
+ si_update_tess_uses_prim_id(sctx);
+ }
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
}
-static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
-{
- sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
- (sctx->tes_shader.cso &&
- sctx->tes_shader.cso->info.uses_primid) ||
- (sctx->tcs_shader.cso &&
- sctx->tcs_shader.cso->info.uses_primid);
-}
-
static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
sctx->tcs_shader.cso = sel;
sctx->tcs_shader.current = sel ? sel->first_variant : NULL;
- si_update_tcs_tes_uses_prim_id(sctx);
+ si_update_tess_uses_prim_id(sctx);
sctx->do_update_shaders = true;
if (enable_changed)
sctx->tes_shader.cso = sel;
sctx->tes_shader.current = sel ? sel->first_variant : NULL;
sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL;
- si_update_tcs_tes_uses_prim_id(sctx);
+ si_update_tess_uses_prim_id(sctx);
sctx->do_update_shaders = true;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
sctx->do_update_shaders = true;
+ if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
+ si_update_tess_uses_prim_id(sctx);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
}
}
+ si_shader_selector_reference(sctx, &shader->previous_stage_sel, NULL);
si_shader_destroy(shader);
free(shader);
}
-static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
+static void si_destroy_shader_selector(struct si_context *sctx,
+ struct si_shader_selector *sel)
{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_shader_selector *sel = (struct si_shader_selector *)state;
struct si_shader *p = sel->first_variant, *c;
struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
[PIPE_SHADER_VERTEX] = &sctx->vs_shader,
free(sel);
}
+static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+ si_shader_selector_reference(sctx, &sel, NULL);
+}
+
static unsigned si_get_ps_input_cntl(struct si_context *sctx,
struct si_shader *vs, unsigned name,
unsigned index, unsigned interpolate)
return true;
}
+static void si_shader_lock(struct si_shader *shader)
+{
+ mtx_lock(&shader->selector->mutex);
+ if (shader->previous_stage_sel) {
+ assert(shader->previous_stage_sel != shader->selector);
+ mtx_lock(&shader->previous_stage_sel->mutex);
+ }
+}
+
+static void si_shader_unlock(struct si_shader *shader)
+{
+ if (shader->previous_stage_sel)
+ mtx_unlock(&shader->previous_stage_sel->mutex);
+ mtx_unlock(&shader->selector->mutex);
+}
+
/**
* @returns 1 if \p sel has been updated to use a new scratch buffer
* 0 if not
if (shader->config.scratch_bytes_per_wave == 0)
return 0;
+ /* Prevent race conditions when updating:
+ * - si_shader::scratch_bo
+ * - si_shader::binary::code
+ * - si_shader::previous_stage::binary::code.
+ */
+ si_shader_lock(shader);
+
/* This shader is already configured to use the current
* scratch buffer. */
- if (shader->scratch_bo == sctx->scratch_buffer)
+ if (shader->scratch_bo == sctx->scratch_buffer) {
+ si_shader_unlock(shader);
return 0;
+ }
assert(sctx->scratch_buffer);
- si_shader_apply_scratch_relocs(sctx, shader, &shader->config, scratch_va);
+ if (shader->previous_stage)
+ si_shader_apply_scratch_relocs(shader->previous_stage, scratch_va);
+
+ si_shader_apply_scratch_relocs(shader, scratch_va);
/* Replace the shader bo with a new bo that has the relocs applied. */
r = si_shader_binary_upload(sctx->screen, shader);
- if (r)
+ if (r) {
+ si_shader_unlock(shader);
return r;
+ }
/* Update the shader state to use the new shader bo. */
si_shader_init_pm4_state(sctx->screen, shader);
r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
+ si_shader_unlock(shader);
return 1;
}
return bytes;
}
+static bool si_update_scratch_relocs(struct si_context *sctx)
+{
+ int r;
+
+ /* Update the shaders, so that they are using the latest scratch.
+ * The scratch buffer may have been changed since these shaders were
+ * last used, so we still need to try to update them, even if they
+ * require scratch buffers smaller than the current size.
+ */
+ r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
+ if (r < 0)
+ return false;
+ if (r == 1)
+ si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
+
+ r = si_update_scratch_buffer(sctx, sctx->gs_shader.current);
+ if (r < 0)
+ return false;
+ if (r == 1)
+ si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
+
+ r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current);
+ if (r < 0)
+ return false;
+ if (r == 1)
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
+
+ /* VS can be bound as LS, ES, or VS. */
+ r = si_update_scratch_buffer(sctx, sctx->vs_shader.current);
+ if (r < 0)
+ return false;
+ if (r == 1) {
+ if (sctx->tes_shader.current)
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
+ else if (sctx->gs_shader.current)
+ si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
+ else
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
+ }
+
+ /* TES can be bound as ES or VS. */
+ r = si_update_scratch_buffer(sctx, sctx->tes_shader.current);
+ if (r < 0)
+ return false;
+ if (r == 1) {
+ if (sctx->gs_shader.current)
+ si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
+ else
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
+ }
+
+ return true;
+}
+
static bool si_update_spi_tmpring_size(struct si_context *sctx)
{
unsigned current_scratch_buffer_size =
unsigned scratch_needed_size = scratch_bytes_per_wave *
sctx->scratch_waves;
unsigned spi_tmpring_size;
- int r;
if (scratch_needed_size > 0) {
if (scratch_needed_size > current_scratch_buffer_size) {
&sctx->scratch_buffer->b.b);
}
- /* Update the shaders, so they are using the latest scratch. The
- * scratch buffer may have been changed since these shaders were
- * last used, so we still need to try to update them, even if
- * they require scratch buffers smaller than the current size.
- */
- r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
- if (r < 0)
+ if (!si_update_scratch_relocs(sctx))
return false;
- if (r == 1)
- si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
-
- r = si_update_scratch_buffer(sctx, sctx->gs_shader.current);
- if (r < 0)
- return false;
- if (r == 1)
- si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
-
- r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current);
- if (r < 0)
- return false;
- if (r == 1)
- si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
-
- /* VS can be bound as LS, ES, or VS. */
- r = si_update_scratch_buffer(sctx, sctx->vs_shader.current);
- if (r < 0)
- return false;
- if (r == 1) {
- if (sctx->tes_shader.current)
- si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
- else if (sctx->gs_shader.current)
- si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
- else
- si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
- }
-
- /* TES can be bound as ES or VS. */
- r = si_update_scratch_buffer(sctx, sctx->tes_shader.current);
- if (r < 0)
- return false;
- if (r == 1) {
- if (sctx->gs_shader.current)
- si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
- else
- si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
- }
}
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
}
assert(!sctx->tf_ring);
+ /* Use 64K alignment for both rings, so that we can pass the address
+ * to shaders as one SGPR containing bits [16:47].
+ */
sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
32768 * sctx->screen->b.info.max_se,
- 256);
+ 64 * 1024);
if (!sctx->tf_ring)
return;
PIPE_USAGE_DEFAULT,
max_offchip_buffers *
sctx->screen->tess_offchip_block_dw_size * 4,
- 256);
+ 64 * 1024);
if (!sctx->tess_offchip_ring)
return;
si_init_config_add_vgt_flush(sctx);
+ uint64_t offchip_va = r600_resource(sctx->tess_offchip_ring)->gpu_address;
+ uint64_t factor_va = r600_resource(sctx->tf_ring)->gpu_address;
+ assert((offchip_va & 0xffff) == 0);
+ assert((factor_va & 0xffff) == 0);
+
+ si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_offchip_ring),
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
+ si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tf_ring),
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
+
/* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
if (sctx->b.chip_class >= VI)
si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(sctx->tf_ring->width0 / 4));
si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
- r600_resource(sctx->tf_ring)->gpu_address >> 8);
+ factor_va >> 8);
if (sctx->b.chip_class >= GFX9)
si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
- r600_resource(sctx->tf_ring)->gpu_address >> 40);
+ factor_va >> 40);
si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
S_03093C_OFFCHIP_GRANULARITY(offchip_granularity));
si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
S_008988_SIZE(sctx->tf_ring->width0 / 4));
si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
- r600_resource(sctx->tf_ring)->gpu_address >> 8);
+ factor_va >> 8);
si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers));
}
+ if (sctx->b.chip_class >= GFX9) {
+ si_pm4_set_reg(sctx->init_config,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+ GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4,
+ offchip_va >> 16);
+ si_pm4_set_reg(sctx->init_config,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+ GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K * 4,
+ factor_va >> 16);
+ } else {
+ si_pm4_set_reg(sctx->init_config,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 +
+ GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4,
+ offchip_va >> 16);
+ si_pm4_set_reg(sctx->init_config,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 +
+ GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K * 4,
+ factor_va >> 16);
+ }
+
/* Flush the context to re-emit the init_config state.
* This is done only once in a lifetime of a context.
*/
si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
sctx->b.initial_gfx_cs_size = 0; /* force flush */
si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
-
- si_set_ring_buffer(&sctx->b.b, SI_HS_RING_TESS_FACTOR, sctx->tf_ring,
- 0, sctx->tf_ring->width0, false, false, 0, 0, 0);
-
- si_set_ring_buffer(&sctx->b.b, SI_HS_RING_TESS_OFFCHIP,
- sctx->tess_offchip_ring, 0,
- sctx->tess_offchip_ring->width0, false, false, 0, 0, 0);
}
/**