radv/gfx10: add an option to switch from legacy to NGG streamout
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 9 Sep 2019 08:23:30 +0000 (10:23 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Sep 2019 10:08:22 +0000 (12:08 +0200)
This internal option is turned off by default because NGG streamout
still hangs. It seems like it's related to GDS as RadeonSI.

That option will be turned on once all issues are resolved.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index 7baa0b3aa361424d53ce9f60b42a851a61734157..94329a2a500e23795c27181cf8a0a55ef5aa24c2 100644 (file)
@@ -222,7 +222,8 @@ radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer,
        struct radv_streamout_state *so = &cmd_buffer->state.streamout;
        struct radv_shader_info *info;
 
-       if (!pipeline->streamout_shader)
+       if (!pipeline->streamout_shader ||
+           cmd_buffer->device->physical_device->use_ngg_streamout)
                return;
 
        info = &pipeline->streamout_shader->info;
@@ -5810,8 +5811,9 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
                              (so->enabled_mask << 8) |
                              (so->enabled_mask << 12);
 
-       if ((old_streamout_enabled != so->streamout_enabled) ||
-           (old_hw_enabled_mask != so->hw_enabled_mask))
+       if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
+           ((old_streamout_enabled != so->streamout_enabled) ||
+            (old_hw_enabled_mask != so->hw_enabled_mask)))
                radv_emit_streamout_enable(cmd_buffer);
 }
 
index bdc38a555dedd44fc753211db19532c57c9576d9..53a08bcdc5ab6df90bb6dcb720e56de352aa8d6e 100644 (file)
@@ -364,6 +364,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
                                    device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
 
+       device->use_ngg_streamout = false;
+
        /* Determine the number of threads per wave for all stages. */
        device->cs_wave_size = 64;
        device->ps_wave_size = 64;
index 88c0c514eaec4110522ab2dda0da1ca6aafbf690..d9c91f0591b8b08f98f2bd3b8b5f58a68dda16f7 100644 (file)
@@ -771,6 +771,9 @@ declare_streamout_sgprs(struct radv_shader_context *ctx, gl_shader_stage stage,
 {
        int i;
 
+       if (ctx->options->use_ngg_streamout)
+               return;
+
        /* Streamout SGPRs. */
        if (ctx->shader_info->so.num_outputs) {
                assert(stage == MESA_SHADER_VERTEX ||
@@ -2786,7 +2789,8 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
               sizeof(outinfo->vs_output_param_offset));
        outinfo->pos_exports = 0;
 
-       if (ctx->shader_info->so.num_outputs &&
+       if (!ctx->options->use_ngg_streamout &&
+           ctx->shader_info->so.num_outputs &&
            !ctx->is_gs_copy_shader) {
                /* The GS copy shader emission already emits streamout. */
                radv_emit_streamout(ctx, 0);
@@ -4479,7 +4483,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
        LLVMValueRef stream_id;
 
        /* Fetch the vertex stream ID. */
-       if (ctx->shader_info->so.num_outputs) {
+       if (!ctx->options->use_ngg_streamout &&
+           ctx->shader_info->so.num_outputs) {
                stream_id =
                        ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2);
        } else {
@@ -4550,7 +4555,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
                        }
                }
 
-               if (ctx->shader_info->so.num_outputs)
+               if (!ctx->options->use_ngg_streamout &&
+                   ctx->shader_info->so.num_outputs)
                        radv_emit_streamout(ctx, stream);
 
                if (stream == 0) {
index 054f6ac36f85bcddf18b51e768e9b8e7526b4048..48ea2c039295d8588e3ba8aa140f9ad14af6c509 100644 (file)
@@ -2350,20 +2350,21 @@ radv_fill_shader_keys(struct radv_device *device,
                                keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
                }
 
-               /* TODO: Implement streamout support for NGG. */
-               gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+               if (!device->physical_device->use_ngg_streamout) {
+                       gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
 
-               for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-                       if (nir[i])
-                               last_xfb_stage = i;
-               }
+                       for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+                               if (nir[i])
+                                       last_xfb_stage = i;
+                       }
 
-               if (nir[last_xfb_stage] &&
-                   radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
-                       if (nir[MESA_SHADER_TESS_CTRL])
-                               keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
-                       else
-                               keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+                       if (nir[last_xfb_stage] &&
+                           radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
+                               if (nir[MESA_SHADER_TESS_CTRL])
+                                       keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+                               else
+                                       keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+                       }
                }
        }
 
index d6c446abd06b529d8c4c15ace342b3f070c2df73..8b612155621ea8b50cc8128c928f8e6d9ffee201 100644 (file)
@@ -288,6 +288,9 @@ struct radv_physical_device {
        /* Whether to enable the AMD_shader_ballot extension */
        bool use_shader_ballot;
 
+       /* Whether to enable NGG streamout. */
+       bool use_ngg_streamout;
+
        /* Number of threads per wave. */
        uint8_t ps_wave_size;
        uint8_t cs_wave_size;
index 473b6b0032f950450165d8fe4f956ada084a2346..c8dd54fae5338dc38f5f2c1a782170473406bd25 100644 (file)
@@ -683,12 +683,15 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
        config_out->float_mode |= V_00B028_FP_64_DENORMS;
 
        config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
-                           S_00B12C_SCRATCH_EN(scratch_enabled) |
-                           S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
-                           S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
-                           S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
-                           S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
-                           S_00B12C_SO_EN(!!info->so.num_outputs);
+                           S_00B12C_SCRATCH_EN(scratch_enabled);
+
+       if (!pdevice->use_ngg_streamout) {
+               config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
+                                    S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+                                    S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
+                                    S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+                                    S_00B12C_SO_EN(!!info->so.num_outputs);
+       }
 
        config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
                                           (info->wave_size == 32 ? 8 : 4)) |
@@ -1050,6 +1053,7 @@ shader_variant_compile(struct radv_device *device,
        options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
        options->address32_hi = device->physical_device->rad_info.address32_hi;
        options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
+       options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
 
        if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) ||
            gs_copy_shader)
index 61431cc9683cc9b445f2aacb11a1a817a40b7569..874318e7dc497c2d6287d549a800d89114528fb5 100644 (file)
@@ -126,6 +126,7 @@ struct radv_nir_compiler_options {
        bool record_llvm_ir;
        bool check_ir;
        bool has_ls_vgpr_init_bug;
+       bool use_ngg_streamout;
        enum radeon_family family;
        enum chip_class chip_class;
        uint32_t tess_offchip_block_dw_size;