#include "compiler/nir/nir_serialize.h"
#include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
#include "util/hash_table.h"
#include "util/crc32.h"
#include "util/u_async_debug.h"
unsigned ir_size;
void *ir_binary;
- if (sel->tokens) {
- ir_binary = sel->tokens;
- ir_size = tgsi_num_tokens(sel->tokens) *
- sizeof(struct tgsi_token);
- } else if (sel->nir_binary) {
+ if (sel->nir_binary) {
ir_binary = sel->nir_binary;
ir_size = sel->nir_size;
} else {
const struct si_shader_selector *tes,
struct si_pm4_state *pm4)
{
- const struct tgsi_shader_info *info = &tes->info;
+ const struct si_shader_info *info = &tes->info;
unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
}
}
-static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
+static unsigned si_get_num_vs_user_sgprs(struct si_shader *shader,
+ unsigned num_always_on_user_sgprs)
{
+ struct si_shader_selector *vs = shader->previous_stage_sel ?
+ shader->previous_stage_sel : shader->selector;
+ unsigned num_vbos_in_user_sgprs = vs->num_vbos_in_user_sgprs;
+
+ /* 1 SGPR is reserved for the vertex buffer pointer. */
+ assert(num_always_on_user_sgprs <= SI_SGPR_VS_VB_DESCRIPTOR_FIRST - 1);
+
+ if (num_vbos_in_user_sgprs)
+ return SI_SGPR_VS_VB_DESCRIPTOR_FIRST + num_vbos_in_user_sgprs * 4;
+
/* Add the pointer to VBO descriptors. */
return num_always_on_user_sgprs + 1;
}
S_00B528_VGPR_COMP_CNT(si_get_vs_vgpr_comp_cnt(sscreen, shader, false)) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
- shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) |
+ shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR)) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
}
unsigned num_user_sgprs =
- si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR);
+ si_get_num_vs_user_sgprs(shader, GFX9_TCS_NUM_USER_SGPR);
shader->config.rsrc2 =
S_00B42C_USER_SGPR(num_user_sgprs) |
if (shader->selector->type == PIPE_SHADER_VERTEX) {
vgpr_comp_cnt = si_get_vs_vgpr_comp_cnt(sscreen, shader, false);
- num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
+ num_user_sgprs = si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR);
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
num_user_sgprs = SI_TES_NUM_USER_SGPR;
unsigned num_user_sgprs;
if (es_type == PIPE_SHADER_VERTEX)
- num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_VSGS_NUM_USER_SGPR);
+ num_user_sgprs = si_get_num_vs_user_sgprs(shader, GFX9_VSGS_NUM_USER_SGPR);
else
num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1);
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
+ if (sscreen->info.chip_class >= GFX10) {
+ si_pm4_set_reg(pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ S_00B204_CU_EN(0xffff) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
+ }
+
shader->ctx_reg.gs.vgt_gs_onchip_cntl =
S_028A44_ES_VERTS_PER_SUBGRP(shader->gs_info.es_verts_per_subgroup) |
S_028A44_GS_PRIMS_PER_SUBGRP(shader->gs_info.gs_prims_per_subgroup) |
}
}
+static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
+{
+ enum si_tracked_reg reg = SI_TRACKED_GE_PC_ALLOC;
+
+ if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 ||
+ sctx->tracked_regs.reg_value[reg] != value) {
+ struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+ if (sctx->family == CHIP_NAVI10 ||
+ sctx->family == CHIP_NAVI12 ||
+ sctx->family == CHIP_NAVI14) {
+ /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
+ }
+
+ radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, value);
+
+ sctx->tracked_regs.reg_saved |= 0x1ull << reg;
+ sctx->tracked_regs.reg_value[reg] = value;
+ }
+}
+
/* Common tail code for NGG primitive shaders. */
static void gfx10_emit_shader_ngg_tail(struct si_context *sctx,
struct si_shader *shader,
if (initial_cdw != sctx->gfx_cs->current.cdw)
sctx->context_roll = true;
+
+ /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
+ gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc);
}
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader)
{
const struct si_shader_selector *gs_sel = shader->selector;
- const struct tgsi_shader_info *gs_info = &gs_sel->info;
+ const struct si_shader_info *gs_info = &gs_sel->info;
enum pipe_shader_type gs_type = shader->selector->type;
const struct si_shader_selector *es_sel =
shader->previous_stage_sel ? shader->previous_stage_sel : shader->selector;
- const struct tgsi_shader_info *es_info = &es_sel->info;
+ const struct si_shader_info *es_info = &es_sel->info;
enum pipe_shader_type es_type = es_sel->type;
unsigned num_user_sgprs;
unsigned nparams, es_vgpr_comp_cnt, gs_vgpr_comp_cnt;
num_user_sgprs = SI_SGPR_VS_BLIT_DATA +
es_info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD];
} else {
- num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_VSGS_NUM_USER_SGPR);
+ num_user_sgprs = si_get_num_vs_user_sgprs(shader, GFX9_VSGS_NUM_USER_SGPR);
}
} else {
assert(es_type == PIPE_SHADER_TESS_EVAL);
* pass edge flags for decomposed primitives (such as quads) to the PA
* for the GL_LINE polygon mode to skip rendering lines on inner edges.
*/
- if (gs_info->uses_invocationid || gs_type == PIPE_SHADER_VERTEX)
+ if (gs_info->uses_invocationid ||
+ (gs_type == PIPE_SHADER_VERTEX && !gfx10_is_ngg_passthrough(shader)))
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID, edge flags. */
- else if (gs_info->uses_primid)
+ else if ((gs_type == PIPE_SHADER_GEOMETRY && gs_info->uses_primid) ||
+ (gs_type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
- else if (input_prim >= PIPE_PRIM_TRIANGLES)
+ else if (input_prim >= PIPE_PRIM_TRIANGLES && !gfx10_is_ngg_passthrough(shader))
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
else
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) |
S_00B22C_LDS_SIZE(shader->config.lds_size));
+ /* Determine LATE_ALLOC_GS. */
+ unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
+ unsigned late_alloc_wave64; /* The limit is per SH. */
+
+ /* For Wave32, the hw will launch twice the number of late
+ * alloc waves, so 1 == 2x wave32.
+ *
+ * Don't use late alloc for NGG on Navi14 due to a hw bug.
+ */
+ if (sscreen->info.family == CHIP_NAVI14)
+ late_alloc_wave64 = 0;
+ else if (num_cu_per_sh <= 6)
+ late_alloc_wave64 = num_cu_per_sh - 2; /* All CUs enabled */
+ else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 6;
+ else
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
+
+ si_pm4_set_reg(pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ S_00B204_CU_EN(0xffff) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
+
nparams = MAX2(shader->info.nr_param_exports, 1);
shader->ctx_reg.ngg.spi_vs_out_config =
S_0286C4_VS_EXPORT_COUNT(nparams - 1) |
shader->ctx_reg.ngg.vgt_primitiveid_en =
S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
- S_028A84_NGG_DISABLE_PROVOK_REUSE(es_enable_prim_id);
+ S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id ||
+ gs_sel->info.writes_primid);
if (gs_type == PIPE_SHADER_GEOMETRY) {
shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4;
S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
- shader->ge_cntl =
- S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
- S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
- S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
+ /* Oversubscribe PC. This improves performance when there are too many varyings. */
+ float oversub_pc_factor = 0.25;
- /* Bug workaround for a possible hang with non-tessellation cases.
- * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
- *
- * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
- */
- if ((sscreen->info.family == CHIP_NAVI10 ||
- sscreen->info.family == CHIP_NAVI12 ||
- sscreen->info.family == CHIP_NAVI14) &&
- (es_type == PIPE_SHADER_VERTEX || gs_type == PIPE_SHADER_VERTEX) && /* = no tess */
- shader->ngg.hw_max_esverts != 256) {
- shader->ge_cntl &= C_03096C_VERT_GRP_SIZE;
-
- if (shader->ngg.hw_max_esverts > 5) {
- shader->ge_cntl |=
- S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts - 5);
+ if (shader->key.opt.ngg_culling) {
+ /* Be more aggressive with NGG culling. */
+ if (shader->info.nr_param_exports > 4)
+ oversub_pc_factor = 1;
+ else if (shader->info.nr_param_exports > 2)
+ oversub_pc_factor = 0.75;
+ else
+ oversub_pc_factor = 0.5;
+ }
+
+ unsigned oversub_pc_lines = sscreen->info.pc_lines * oversub_pc_factor;
+ shader->ctx_reg.ngg.ge_pc_alloc = S_030980_OVERSUB_EN(1) |
+ S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
+
+ if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) {
+ shader->ge_cntl =
+ S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims * 3);
+ } else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
+ shader->ge_cntl =
+ S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims + 2);
+ } else {
+ shader->ge_cntl =
+ S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
+ S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
+
+ /* Bug workaround for a possible hang with non-tessellation cases.
+ * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
+ *
+ * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
+ */
+ if ((sscreen->info.family == CHIP_NAVI10 ||
+ sscreen->info.family == CHIP_NAVI12 ||
+ sscreen->info.family == CHIP_NAVI14) &&
+ (es_type == PIPE_SHADER_VERTEX || gs_type == PIPE_SHADER_VERTEX) && /* = no tess */
+ shader->ngg.hw_max_esverts != 256) {
+ shader->ge_cntl &= C_03096C_VERT_GRP_SIZE;
+
+ if (shader->ngg.hw_max_esverts > 5) {
+ shader->ge_cntl |=
+ S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts - 5);
+ }
}
}
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl);
- if (initial_cdw != sctx->gfx_cs->current.cdw)
- sctx->context_roll = true;
-
/* Required programming for tessellation. (legacy pipeline only) */
if (sctx->chip_class == GFX10 &&
shader->selector->type == PIPE_SHADER_TESS_EVAL) {
shader->pa_cl_vs_out_cntl,
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
}
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll = true;
+
+ /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
+ if (sctx->chip_class >= GFX10)
+ gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.vs.ge_pc_alloc);
}
/**
static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
struct si_shader_selector *gs)
{
- const struct tgsi_shader_info *info = &shader->selector->info;
+ const struct si_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned num_user_sgprs, vgpr_comp_cnt;
uint64_t va;
num_user_sgprs = SI_SGPR_VS_BLIT_DATA +
info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD];
} else {
- num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
+ num_user_sgprs = si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR);
}
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = enable_prim_id ? 3 : 2;
S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE);
+ shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(1) |
+ S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1);
shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false);
oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
S_00B12C_OC_LDS_EN(oc_lds_en) |
S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+ if (sscreen->info.chip_class >= GFX10)
+ rsrc2 |= S_00B12C_USER_SGPR_MSB_GFX10(num_user_sgprs >> 5);
+ else if (sscreen->info.chip_class == GFX9)
+ rsrc2 |= S_00B12C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
+
if (sscreen->info.chip_class <= GFX9)
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
static unsigned si_get_ps_num_interp(struct si_shader *ps)
{
- struct tgsi_shader_info *info = &ps->selector->info;
+ struct si_shader_info *info = &ps->selector->info;
unsigned num_colors = !!(info->colors_read & 0x0f) +
!!(info->colors_read & 0xf0);
unsigned num_interp = ps->selector->info.num_inputs +
static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
{
- struct tgsi_shader_info *info = &shader->selector->info;
+ struct si_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t linked = outputs_written & inputs_read;
key->opt.kill_outputs = ~linked & outputs_written;
+ key->opt.ngg_culling = sctx->ngg_culling;
}
/* Compute the key for the hw shader variant */
key->part.tcs.epilog.prim_mode =
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
key->part.tcs.epilog.invoc0_tess_factors_are_def =
- sel->tcs_info.tessfactors_are_def_in_all_invocs;
+ sel->info.tessfactors_are_def_in_all_invocs;
key->part.tcs.epilog.tes_reads_tess_factors =
sctx->tes_shader.cso->info.reads_tess_factors;
if (!compiler->passes)
si_init_compiler(sscreen, compiler);
- if (unlikely(!si_shader_create(sscreen, compiler, shader, debug))) {
+ if (unlikely(!si_create_shader_variant(sscreen, compiler, shader, debug))) {
PRINT_ERR("Failed to build shader variant (type=%u)\n",
sel->type);
shader->compilation_failed = true;
main_part->key.as_ngg = key->as_ngg;
main_part->is_monolithic = false;
- if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
- main_part, &compiler_state->debug) != 0) {
+ if (!si_compile_shader(sscreen, compiler_state->compiler,
+ main_part, &compiler_state->debug)) {
FREE(main_part);
return false;
}
&key, -1, false);
}
-static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
+static void si_parse_next_shader_property(const struct si_shader_info *info,
bool streamout,
struct si_shader_key *key)
{
sel->type == PIPE_SHADER_GEOMETRY))
shader->key.as_ngg = 1;
- if (sel->tokens || sel->nir) {
+ if (sel->nir) {
si_get_ir_cache_key(sel, shader->key.as_ngg,
shader->key.as_es, ir_sha1_cache_key);
}
simple_mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_tgsi_shader(sscreen, compiler, shader,
- debug) != 0) {
+ if (!si_compile_shader(sscreen, compiler, shader, debug)) {
FREE(shader);
fprintf(stderr, "radeonsi: can't compile a main shader part\n");
return;
}
/* Return descriptor slot usage masks from the given shader info. */
-void si_get_active_slot_masks(const struct tgsi_shader_info *info,
+void si_get_active_slot_masks(const struct si_shader_info *info,
uint32_t *const_and_shader_buffers,
uint64_t *samplers_and_images)
{
sel->so = state->stream_output;
- if (state->type == PIPE_SHADER_IR_TGSI &&
- !sscreen->options.enable_nir) {
- sel->tokens = tgsi_dup_tokens(state->tokens);
- if (!sel->tokens) {
- FREE(sel);
- return NULL;
- }
-
- tgsi_scan_shader(state->tokens, &sel->info);
- tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
- /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
- if (sel->info.uses_persp_opcode_interp_centroid)
- sel->info.uses_persp_centroid = true;
-
- if (sel->info.uses_linear_opcode_interp_centroid)
- sel->info.uses_linear_centroid = true;
-
- if (sel->info.uses_persp_opcode_interp_offset ||
- sel->info.uses_persp_opcode_interp_sample)
- sel->info.uses_persp_center = true;
-
- if (sel->info.uses_linear_opcode_interp_offset ||
- sel->info.uses_linear_opcode_interp_sample)
- sel->info.uses_linear_center = true;
+ if (state->type == PIPE_SHADER_IR_TGSI) {
+ sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
} else {
- if (state->type == PIPE_SHADER_IR_TGSI) {
- sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
- } else {
- assert(state->type == PIPE_SHADER_IR_NIR);
- sel->nir = state->ir.nir;
- }
-
- si_nir_scan_shader(sel->nir, &sel->info);
- si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
- si_nir_adjust_driver_locations(sel->nir);
+ assert(state->type == PIPE_SHADER_IR_NIR);
+ sel->nir = state->ir.nir;
}
+ si_nir_scan_shader(sel->nir, &sel->info);
+ si_nir_adjust_driver_locations(sel->nir);
+
sel->type = sel->info.processor;
p_atomic_inc(&sscreen->num_shaders_created);
si_get_active_slot_masks(&sel->info,
(sel->so.output[i].stream * 4);
}
+ sel->num_vs_inputs = sel->type == PIPE_SHADER_VERTEX &&
+ !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] ?
+ sel->info.num_inputs : 0;
+ sel->num_vbos_in_user_sgprs =
+ MIN2(sel->num_vs_inputs, sscreen->num_vbos_in_user_sgprs);
+
/* The prolog is a no-op if there are no inputs. */
sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
sel->info.num_inputs &&
default:;
}
+ sel->ngg_culling_allowed =
+ sscreen->info.chip_class == GFX10 &&
+ sscreen->info.has_dedicated_vram &&
+ sscreen->use_ngg_culling &&
+ /* Disallow TES by default, because TessMark results are mixed. */
+ (sel->type == PIPE_SHADER_VERTEX ||
+ (sscreen->always_use_ngg_culling && sel->type == PIPE_SHADER_TESS_EVAL)) &&
+ sel->info.writes_position &&
+ !sel->info.writes_viewport_index && /* cull only against viewport 0 */
+ !sel->info.writes_memory &&
+ !sel->so.num_outputs &&
+ !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] &&
+ !sel->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+
/* PA_CL_VS_OUT_CNTL */
if (sctx->chip_class <= GFX9)
sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
sctx->ngg = new_ngg;
- sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+ sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
return true;
}
return false;
sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL;
si_update_common_shader_state(sctx);
- sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+ sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
ngg_changed = si_update_ngg(sctx);
if (ngg_changed || enable_changed)
si_update_tess_uses_prim_id(sctx);
si_update_common_shader_state(sctx);
- sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+ sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
bool ngg_changed = si_update_ngg(sctx);
if (ngg_changed || enable_changed)
util_queue_fence_destroy(&sel->ready);
simple_mtx_destroy(&sel->mutex);
- free(sel->tokens);
ralloc_free(sel->nir);
free(sel->nir_binary);
free(sel);
struct si_shader *vs, unsigned name,
unsigned index, unsigned interpolate)
{
- struct tgsi_shader_info *vsinfo = &vs->selector->info;
+ struct si_shader_info *vsinfo = &vs->selector->info;
unsigned j, offset, ps_input_cntl = 0;
if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
{
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
- struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
+ struct si_shader_info *psinfo = ps ? &ps->selector->info : NULL;
unsigned i, num_interp, num_written = 0, bcol_interp[2];
unsigned spi_ps_input_cntl[32];
}
if (key.u.ngg) {
- stages |= S_028B54_PRIMGEN_EN(1);
- if (key.u.streamout)
- stages |= S_028B54_NGG_WAVE_ID_EN(1);
+ stages |= S_028B54_PRIMGEN_EN(1) |
+ S_028B54_GS_FAST_LAUNCH(key.u.ngg_gs_fast_launch) |
+ S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
+ S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
} else if (key.u.gs)
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
}
}
+ /* This must be done after the shader variant is selected. */
+ if (sctx->ngg) {
+ struct si_shader *vs = si_get_vs(sctx)->current;
+
+ key.u.ngg_passthrough = gfx10_is_ngg_passthrough(vs);
+ key.u.ngg_gs_fast_launch = !!(vs->key.opt.ngg_culling &
+ SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
+ }
+
si_update_vgt_shader_config(sctx, key);
if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)