radeonsi: remove the always_nir option
[mesa.git] / src / gallium / drivers / radeonsi / si_pipe.c
index 8bd9828f8532933e2d2bbd8290bdaeb4e1a24e30..f19c2a22ebd362322979b2965ab82bf254fd3afd 100644 (file)
@@ -31,7 +31,6 @@
 
 #include "ac_llvm_util.h"
 #include "radeon/radeon_uvd.h"
-#include "gallivm/lp_bld_misc.h"
 #include "util/disk_cache.h"
 #include "util/u_log.h"
 #include "util/u_memory.h"
@@ -63,6 +62,12 @@ static const struct debug_named_value debug_options[] = {
        { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
        { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
        { "gisel", DBG(GISEL), "Enable LLVM global instruction selector." },
+       { "w32ge", DBG(W32_GE), "Use Wave32 for vertex, tessellation, and geometry shaders." },
+       { "w32ps", DBG(W32_PS), "Use Wave32 for pixel shaders." },
+       { "w32cs", DBG(W32_CS), "Use Wave32 for computes shaders." },
+       { "w64ge", DBG(W64_GE), "Use Wave64 for vertex, tessellation, and geometry shaders." },
+       { "w64ps", DBG(W64_PS), "Use Wave64 for pixel shaders." },
+       { "w64cs", DBG(W64_CS), "Use Wave64 for computes shaders." },
 
        /* Shader compiler options (with no effect on the shader cache): */
        { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
@@ -84,6 +89,7 @@ static const struct debug_named_value debug_options[] = {
        { "zerovram", DBG(ZERO_VRAM), "Clear VRAM allocations." },
 
        /* 3D engine options: */
+       { "nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used." },
        { "alwayspd", DBG(ALWAYS_PD), "Always enable the primitive discard compute shader." },
        { "pd", DBG(PD), "Enable the primitive discard compute shader for large draw calls." },
        { "nopd", DBG(NO_PD), "Disable the primitive discard compute shader." },
@@ -137,6 +143,8 @@ static void si_init_compiler(struct si_screen *sscreen,
        ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
        compiler->passes = ac_create_llvm_passes(compiler->tm);
 
+       if (compiler->tm_wave32)
+               compiler->passes_wave32 = ac_create_llvm_passes(compiler->tm_wave32);
        if (compiler->low_opt_tm)
                compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
 }
@@ -384,8 +392,14 @@ static void si_set_context_param(struct pipe_context *ctx,
 static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                               unsigned flags)
 {
-       struct si_context *sctx = CALLOC_STRUCT(si_context);
        struct si_screen* sscreen = (struct si_screen *)screen;
+
+       /* Don't create a context if it's not compute-only and hw is compute-only. */
+       if (!sscreen->info.has_graphics &&
+           !(flags & PIPE_CONTEXT_COMPUTE_ONLY))
+               return NULL;
+
+       struct si_context *sctx = CALLOC_STRUCT(si_context);
        struct radeon_winsys *ws = sscreen->ws;
        int shader, i;
        bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
@@ -487,8 +501,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        if (!sctx->border_color_map)
                goto fail;
 
-       if (sctx->chip_class >= GFX10)
-               sctx->ngg = !sscreen->options.disable_ngg;
+       sctx->ngg = sscreen->use_ngg;
 
        /* Initialize context functions used by graphics and compute. */
        if (sctx->chip_class >= GFX10)
@@ -513,10 +526,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        si_init_fence_functions(sctx);
        si_init_query_functions(sctx);
        si_init_state_compute_functions(sctx);
+       si_init_context_texture_functions(sctx);
 
        /* Initialize graphics-only context functions. */
        if (sctx->has_graphics) {
-               si_init_context_texture_functions(sctx);
                if (sctx->chip_class >= GFX10)
                        gfx10_init_query(sctx);
                si_init_msaa_functions(sctx);
@@ -530,6 +543,17 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                        goto fail;
                sctx->blitter->skip_viewport_restore = true;
 
+               /* Some states are expected to be always non-NULL. */
+               sctx->noop_blend = util_blitter_get_noop_blend_state(sctx->blitter);
+               sctx->queued.named.blend = sctx->noop_blend;
+
+               sctx->noop_dsa = util_blitter_get_noop_dsa_state(sctx->blitter);
+               sctx->queued.named.dsa = sctx->noop_dsa;
+
+               sctx->discard_rasterizer_state =
+                       util_blitter_get_discard_rasterizer_state(sctx->blitter);
+               sctx->queued.named.rasterizer = sctx->discard_rasterizer_state;
+
                si_init_draw_functions(sctx);
                si_initialize_prim_discard_tunables(sctx);
        }
@@ -848,9 +872,19 @@ static void si_disk_cache_create(struct si_screen *sscreen)
        #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |  \
                           DBG(SI_SCHED) |                      \
                           DBG(GISEL) |                         \
-                          DBG(UNSAFE_MATH))
-       uint64_t shader_debug_flags = sscreen->debug_flags &
-               ALL_FLAGS;
+                          DBG(UNSAFE_MATH) |                   \
+                          DBG(W32_GE) |                        \
+                          DBG(W32_PS) |                        \
+                          DBG(W32_CS) |                        \
+                          DBG(W64_GE) |                        \
+                          DBG(W64_PS) |                        \
+                          DBG(W64_CS))
+       uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
+
+       if (sscreen->options.enable_nir) {
+               STATIC_ASSERT((ALL_FLAGS & (1u << 31)) == 0);
+               shader_debug_flags |= 1u << 31;
+       }
 
        /* Add the high bits of 32-bit addresses, which affects
         * how 32-bit addresses are expanded to 64 bits.
@@ -859,9 +893,6 @@ static void si_disk_cache_create(struct si_screen *sscreen)
        assert((int16_t)sscreen->info.address32_hi == (int32_t)sscreen->info.address32_hi);
        shader_debug_flags |= (uint64_t)(sscreen->info.address32_hi & 0xffff) << 32;
 
-       if (sscreen->options.enable_nir)
-               shader_debug_flags |= 1ull << 48;
-
        sscreen->disk_shader_cache =
                disk_cache_create(sscreen->info.name,
                                  cache_id,
@@ -884,11 +915,6 @@ static bool si_is_parallel_shader_compilation_finished(struct pipe_screen *scree
                                                       void *shader,
                                                       enum pipe_shader_type shader_type)
 {
-       if (shader_type == PIPE_SHADER_COMPUTE) {
-               struct si_compute *cs = (struct si_compute*)shader;
-
-               return util_queue_fence_is_signalled(&cs->ready);
-       }
        struct si_shader_selector *sel = (struct si_shader_selector *)shader;
 
        return util_queue_fence_is_signalled(&sel->ready);
@@ -928,6 +954,9 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
        sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG",
                                                       debug_options, 0);
 
+       if (sscreen->debug_flags & DBG(NO_GFX))
+               sscreen->info.has_graphics = false;
+
        /* Set functions first. */
        sscreen->b.context_create = si_pipe_create_context;
        sscreen->b.destroy = si_destroy_screen;
@@ -1079,7 +1108,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
         * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
         * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel. */
        sscreen->has_clear_state = sscreen->info.chip_class >= GFX7 &&
-                                  sscreen->info.chip_class <= GFX9 &&
                                   sscreen->info.is_amdgpu;
 
        sscreen->has_distributed_tess =
@@ -1123,6 +1151,8 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                                        sscreen->info.family == CHIP_RAVEN;
        sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2 ||
                                           sscreen->info.chip_class >= GFX10;
+       sscreen->use_ngg = sscreen->info.chip_class >= GFX10;
+       sscreen->use_ngg_streamout = sscreen->info.chip_class >= GFX10;
 
        /* Only enable primitive binning on APUs by default. */
        if (sscreen->info.chip_class >= GFX10) {
@@ -1217,9 +1247,35 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
        for (i = 0; i < num_comp_lo_threads; i++)
                si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
 
+       sscreen->ge_wave_size = 64;
+       sscreen->ps_wave_size = 64;
+       sscreen->compute_wave_size = 64;
+
+       if (sscreen->info.chip_class >= GFX10) {
+               /* Pixels shaders: Wave64 is recommended.
+                * Compute shaders: There are piglit failures with Wave32.
+                */
+               sscreen->ge_wave_size = 32;
+
+               if (sscreen->debug_flags & DBG(W32_GE))
+                       sscreen->ge_wave_size = 32;
+               if (sscreen->debug_flags & DBG(W32_PS))
+                       sscreen->ps_wave_size = 32;
+               if (sscreen->debug_flags & DBG(W32_CS))
+                       sscreen->compute_wave_size = 32;
+
+               if (sscreen->debug_flags & DBG(W64_GE))
+                       sscreen->ge_wave_size = 64;
+               if (sscreen->debug_flags & DBG(W64_PS))
+                       sscreen->ps_wave_size = 64;
+               if (sscreen->debug_flags & DBG(W64_CS))
+                       sscreen->compute_wave_size = 64;
+       }
+
        /* Create the auxiliary context. This must be done last. */
-       sscreen->aux_context = si_create_context(
-               &sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0);
+       sscreen->aux_context = si_create_context(&sscreen->b,
+               (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) |
+               (sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY));
        if (sscreen->options.aux_debug) {
                struct u_log_context *log = CALLOC_STRUCT(u_log_context);
                u_log_context_init(log);