struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radv_shader_info *info;
- if (!pipeline->streamout_shader)
+ if (!pipeline->streamout_shader ||
+ cmd_buffer->device->physical_device->use_ngg_streamout)
return;
info = &pipeline->streamout_shader->info;
(so->enabled_mask << 8) |
(so->enabled_mask << 12);
- if ((old_streamout_enabled != so->streamout_enabled) ||
- (old_hw_enabled_mask != so->hw_enabled_mask))
+ if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
+ ((old_streamout_enabled != so->streamout_enabled) ||
+ (old_hw_enabled_mask != so->hw_enabled_mask)))
radv_emit_streamout_enable(cmd_buffer);
}
device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+ device->use_ngg_streamout = false;
+
/* Determine the number of threads per wave for all stages. */
device->cs_wave_size = 64;
device->ps_wave_size = 64;
{
int i;
+ if (ctx->options->use_ngg_streamout)
+ return;
+
/* Streamout SGPRs. */
if (ctx->shader_info->so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX ||
sizeof(outinfo->vs_output_param_offset));
outinfo->pos_exports = 0;
- if (ctx->shader_info->so.num_outputs &&
+ if (!ctx->options->use_ngg_streamout &&
+ ctx->shader_info->so.num_outputs &&
!ctx->is_gs_copy_shader) {
/* The GS copy shader emission already emits streamout. */
radv_emit_streamout(ctx, 0);
LLVMValueRef stream_id;
/* Fetch the vertex stream ID. */
- if (ctx->shader_info->so.num_outputs) {
+ if (!ctx->options->use_ngg_streamout &&
+ ctx->shader_info->so.num_outputs) {
stream_id =
ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2);
} else {
}
}
- if (ctx->shader_info->so.num_outputs)
+ if (!ctx->options->use_ngg_streamout &&
+ ctx->shader_info->so.num_outputs)
radv_emit_streamout(ctx, stream);
if (stream == 0) {
keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
}
- /* TODO: Implement streamout support for NGG. */
- gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+ if (!device->physical_device->use_ngg_streamout) {
+ gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
- for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
- if (nir[i])
- last_xfb_stage = i;
- }
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+ if (nir[i])
+ last_xfb_stage = i;
+ }
- if (nir[last_xfb_stage] &&
- radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
- if (nir[MESA_SHADER_TESS_CTRL])
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
- else
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ if (nir[last_xfb_stage] &&
+ radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ }
}
}
/* Whether to enable the AMD_shader_ballot extension */
bool use_shader_ballot;
+ /* Whether to enable NGG streamout. */
+ bool use_ngg_streamout;
+
/* Number of threads per wave. */
uint8_t ps_wave_size;
uint8_t cs_wave_size;
config_out->float_mode |= V_00B028_FP_64_DENORMS;
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
- S_00B12C_SCRATCH_EN(scratch_enabled) |
- S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
- S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
- S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
- S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
- S_00B12C_SO_EN(!!info->so.num_outputs);
+ S_00B12C_SCRATCH_EN(scratch_enabled);
+
+ if (!pdevice->use_ngg_streamout) {
+ config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
+ S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+ S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
+ S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+ S_00B12C_SO_EN(!!info->so.num_outputs);
+ }
config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
(info->wave_size == 32 ? 8 : 4)) |
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
options->address32_hi = device->physical_device->rad_info.address32_hi;
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
+ options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) ||
gs_copy_shader)
bool record_llvm_ir;
bool check_ir;
bool has_ls_vgpr_init_bug;
+ bool use_ngg_streamout;
enum radeon_family family;
enum chip_class chip_class;
uint32_t tess_offchip_block_dw_size;