This internal option is turned off by default because NGG streamout
still hangs. It seems like it's related to GDS as RadeonSI.
That option will be turned on once all issues are resolved.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radv_shader_info *info;
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radv_shader_info *info;
- if (!pipeline->streamout_shader)
+ if (!pipeline->streamout_shader ||
+ cmd_buffer->device->physical_device->use_ngg_streamout)
return;
info = &pipeline->streamout_shader->info;
return;
info = &pipeline->streamout_shader->info;
(so->enabled_mask << 8) |
(so->enabled_mask << 12);
(so->enabled_mask << 8) |
(so->enabled_mask << 12);
- if ((old_streamout_enabled != so->streamout_enabled) ||
- (old_hw_enabled_mask != so->hw_enabled_mask))
+ if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
+ ((old_streamout_enabled != so->streamout_enabled) ||
+ (old_hw_enabled_mask != so->hw_enabled_mask)))
radv_emit_streamout_enable(cmd_buffer);
}
radv_emit_streamout_enable(cmd_buffer);
}
device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+ device->use_ngg_streamout = false;
+
/* Determine the number of threads per wave for all stages. */
device->cs_wave_size = 64;
device->ps_wave_size = 64;
/* Determine the number of threads per wave for all stages. */
device->cs_wave_size = 64;
device->ps_wave_size = 64;
+ if (ctx->options->use_ngg_streamout)
+ return;
+
/* Streamout SGPRs. */
if (ctx->shader_info->so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX ||
/* Streamout SGPRs. */
if (ctx->shader_info->so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX ||
sizeof(outinfo->vs_output_param_offset));
outinfo->pos_exports = 0;
sizeof(outinfo->vs_output_param_offset));
outinfo->pos_exports = 0;
- if (ctx->shader_info->so.num_outputs &&
+ if (!ctx->options->use_ngg_streamout &&
+ ctx->shader_info->so.num_outputs &&
!ctx->is_gs_copy_shader) {
/* The GS copy shader emission already emits streamout. */
radv_emit_streamout(ctx, 0);
!ctx->is_gs_copy_shader) {
/* The GS copy shader emission already emits streamout. */
radv_emit_streamout(ctx, 0);
LLVMValueRef stream_id;
/* Fetch the vertex stream ID. */
LLVMValueRef stream_id;
/* Fetch the vertex stream ID. */
- if (ctx->shader_info->so.num_outputs) {
+ if (!ctx->options->use_ngg_streamout &&
+ ctx->shader_info->so.num_outputs) {
stream_id =
ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2);
} else {
stream_id =
ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2);
} else {
- if (ctx->shader_info->so.num_outputs)
+ if (!ctx->options->use_ngg_streamout &&
+ ctx->shader_info->so.num_outputs)
radv_emit_streamout(ctx, stream);
if (stream == 0) {
radv_emit_streamout(ctx, stream);
if (stream == 0) {
keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
}
keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
}
- /* TODO: Implement streamout support for NGG. */
- gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+ if (!device->physical_device->use_ngg_streamout) {
+ gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
- for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
- if (nir[i])
- last_xfb_stage = i;
- }
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+ if (nir[i])
+ last_xfb_stage = i;
+ }
- if (nir[last_xfb_stage] &&
- radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
- if (nir[MESA_SHADER_TESS_CTRL])
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
- else
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ if (nir[last_xfb_stage] &&
+ radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ }
/* Whether to enable the AMD_shader_ballot extension */
bool use_shader_ballot;
/* Whether to enable the AMD_shader_ballot extension */
bool use_shader_ballot;
+ /* Whether to enable NGG streamout. */
+ bool use_ngg_streamout;
+
/* Number of threads per wave. */
uint8_t ps_wave_size;
uint8_t cs_wave_size;
/* Number of threads per wave. */
uint8_t ps_wave_size;
uint8_t cs_wave_size;
config_out->float_mode |= V_00B028_FP_64_DENORMS;
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
config_out->float_mode |= V_00B028_FP_64_DENORMS;
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
- S_00B12C_SCRATCH_EN(scratch_enabled) |
- S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
- S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
- S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
- S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
- S_00B12C_SO_EN(!!info->so.num_outputs);
+ S_00B12C_SCRATCH_EN(scratch_enabled);
+
+ if (!pdevice->use_ngg_streamout) {
+ config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
+ S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+ S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
+ S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+ S_00B12C_SO_EN(!!info->so.num_outputs);
+ }
config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
(info->wave_size == 32 ? 8 : 4)) |
config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
(info->wave_size == 32 ? 8 : 4)) |
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
options->address32_hi = device->physical_device->rad_info.address32_hi;
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
options->address32_hi = device->physical_device->rad_info.address32_hi;
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
+ options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) ||
gs_copy_shader)
if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) ||
gs_copy_shader)
bool record_llvm_ir;
bool check_ir;
bool has_ls_vgpr_init_bug;
bool record_llvm_ir;
bool check_ir;
bool has_ls_vgpr_init_bug;
+ bool use_ngg_streamout;
enum radeon_family family;
enum chip_class chip_class;
uint32_t tess_offchip_block_dw_size;
enum radeon_family family;
enum chip_class chip_class;
uint32_t tess_offchip_block_dw_size;