return variant->info.is_ngg;
}
+bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline)
+{
+ assert(radv_pipeline_has_ngg(pipeline));
+
+ struct radv_shader_variant *variant = NULL;
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ else if (pipeline->shaders[MESA_SHADER_VERTEX])
+ variant = pipeline->shaders[MESA_SHADER_VERTEX];
+ else
+ return false;
+ return variant->info.is_ngg_passthrough;
+}
+
bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
{
if (!radv_pipeline_has_gs(pipeline))
keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
}
- if (!device->physical_device->use_ngg_streamout) {
- gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+ gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
- for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
- if (nir[i])
- last_xfb_stage = i;
- }
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+ if (nir[i])
+ last_xfb_stage = i;
+ }
- if (nir[last_xfb_stage] &&
- radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
- if (nir[MESA_SHADER_TESS_CTRL])
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
- else
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ bool uses_xfb = nir[last_xfb_stage] &&
+ radv_nir_stage_uses_xfb(nir[last_xfb_stage]);
+
+ if (!device->physical_device->use_ngg_streamout && uses_xfb) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ }
+
+ /* Determine if the pipeline is eligible for the NGG passthrough
+ * mode. It can't be enabled for geometry shaders, for NGG
+ * streamout or for vertex shaders that export the primitive ID
+ * (this is checked later because we don't have the info here.)
+ */
+ if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) {
+ if (nir[MESA_SHADER_TESS_CTRL] &&
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg) {
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg_passthrough = true;
+ } else if (nir[MESA_SHADER_VERTEX] &&
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) {
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = true;
}
}
}
keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
!!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+ /* NGG passthrough mode can't be enabled for vertex shaders
+ * that export the primitive ID.
+ *
+ * TODO: I should really refactor the keys logic.
+ */
+ if (nir[MESA_SHADER_VERTEX] &&
+ keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id) {
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = false;
+ }
+
filled_stages |= (1 << MESA_SHADER_FRAGMENT);
}
}
static
-bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_ts)
{
- return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+ return (stage == MESA_SHADER_VERTEX && !has_ts) ||
+ (stage == MESA_SHADER_GEOMETRY && !has_ts) ||
stage == MESA_SHADER_FRAGMENT ||
stage == MESA_SHADER_COMPUTE;
}
modules[MESA_SHADER_FRAGMENT] = &fs_m;
}
- bool has_gs = modules[MESA_SHADER_GEOMETRY];
bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
bool use_aco = device->physical_device->use_aco;
radv_start_feedback(stage_feedbacks[i]);
- bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_ts);
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
nir_lower_non_uniform_texture_access |
nir_lower_non_uniform_image_access);
- bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_ts);
if (!aco)
NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
}
-
- if (radv_can_dump_shader(device, modules[i], false))
- nir_print_shader(nir[i], stderr);
}
if (nir[MESA_SHADER_FRAGMENT])
radv_lower_fs_io(nir[MESA_SHADER_FRAGMENT]);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (radv_can_dump_shader(device, modules[i], false))
+ nir_print_shader(nir[i], stderr);
+ }
+
radv_fill_shader_keys(device, keys, key, nir);
radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
gfx9_get_gs_info(key, pipeline, nir, infos, gs_info);
}
+ if(modules[MESA_SHADER_GEOMETRY]) {
+ struct radv_shader_binary *gs_copy_binary = NULL;
+ if (!pipeline->gs_copy_shader &&
+ !radv_pipeline_has_ngg(pipeline)) {
+ struct radv_shader_info info = {};
+ struct radv_shader_variant_key key = {};
+
+ key.has_multiview_view_index =
+ keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
+
+ radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
+ pipeline->layout, &key,
+ &info);
+ info.wave_size = 64; /* Wave32 not supported. */
+
+ pipeline->gs_copy_shader = radv_create_gs_copy_shader(
+ device, nir[MESA_SHADER_GEOMETRY], &info,
+ &gs_copy_binary, keep_executable_info,
+ keys[MESA_SHADER_GEOMETRY].has_multiview_view_index,
+ use_aco);
+ }
+
+ if (!keep_executable_info && pipeline->gs_copy_shader) {
+ struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
+
+ binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
+ variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
+
+ radv_pipeline_cache_insert_shaders(device, cache,
+ gs_copy_hash,
+ variants,
+ binaries);
+ }
+ free(gs_copy_binary);
+ }
+
if (nir[MESA_SHADER_FRAGMENT]) {
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
- bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
+ bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_ts);
pipeline->shaders[MESA_SHADER_FRAGMENT] =
radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
pipeline->layout, keys + MESA_SHADER_FRAGMENT,
radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
+ bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_GEOMETRY, has_ts);
pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
pipeline->layout,
&keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
- false, &binaries[MESA_SHADER_GEOMETRY]);
+ aco, &binaries[MESA_SHADER_GEOMETRY]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
}
radv_start_feedback(stage_feedbacks[i]);
- bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_ts);
pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
pipeline->layout,
keys + i, infos + i,keep_executable_info,
}
}
- if(modules[MESA_SHADER_GEOMETRY]) {
- struct radv_shader_binary *gs_copy_binary = NULL;
- if (!pipeline->gs_copy_shader &&
- !radv_pipeline_has_ngg(pipeline)) {
- struct radv_shader_info info = {};
- struct radv_shader_variant_key key = {};
-
- key.has_multiview_view_index =
- keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
-
- radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
- pipeline->layout, &key,
- &info);
- info.wave_size = 64; /* Wave32 not supported. */
-
- pipeline->gs_copy_shader = radv_create_gs_copy_shader(
- device, nir[MESA_SHADER_GEOMETRY], &info,
- &gs_copy_binary, keep_executable_info,
- keys[MESA_SHADER_GEOMETRY].has_multiview_view_index);
- }
-
- if (!keep_executable_info && pipeline->gs_copy_shader) {
- struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
- struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
-
- binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
- variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
-
- radv_pipeline_cache_insert_shaders(device, cache,
- gs_copy_hash,
- variants,
- binaries);
- }
- free(gs_copy_binary);
- }
-
if (!keep_executable_info) {
radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
binaries);
radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
- S_028A84_NGG_DISABLE_PROVOK_REUSE(es_enable_prim_id));
+ S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
ngg_state->vgt_esgs_ring_itemsize);
gs->info.gs.vertices_out);
}
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade,
+ bool explicit, bool float16)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
ps_input_cntl = S_028644_OFFSET(offset);
- if (flat_shade)
+ if (flat_shade || explicit)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
+ if (explicit) {
+ /* Force parameter cache to be read in passthrough
+ * mode.
+ */
+ ps_input_cntl |= S_028644_OFFSET(1 << 5);
+ }
if (float16) {
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
S_028644_ATTR0_VALID(1);
if (ps->info.ps.prim_id_input) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
++ps_offset;
}
}
ps->info.needs_multiview_view_index) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
else
- ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
ps->info.ps.num_input_clips_culls > 4) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
++ps_offset;
}
}
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) {
unsigned vs_offset;
bool flat_shade;
+ bool explicit;
bool float16;
if (!(ps->info.ps.input_mask & (1u << i)))
continue;
}
flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset));
+ explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset));
float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset));
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16);
++ps_offset;
}
stages |= S_028B54_PRIMGEN_EN(1);
if (pipeline->streamout_shader)
stages |= S_028B54_NGG_WAVE_ID_EN(1);
+ if (radv_pipeline_has_ngg_passthrough(pipeline))
+ stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
} else if (radv_pipeline_has_gs(pipeline)) {
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
}