radeonsi: fix shader disk cache key

[mesa.git] / src / gallium / drivers / radeonsi / si_pipe.c
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index a0ad77435cb1e87e9d20adaa933807872db9c780..1be3db4602bb9a64663f205cd08ba6dbeb91de7f 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -45,6 +45,11 @@
  #include "gallium/winsys/amdgpu/drm/amdgpu_public.h"
  #include <xf86drm.h>
  
+#include <llvm/Config/llvm-config.h>
+
+static struct pipe_context *si_create_context(struct pipe_screen *screen,
+                                              unsigned flags);
+
  static const struct debug_named_value debug_options[] = {
         /* Shader logging options: */
         { "vs", DBG(VS), "Print vertex shaders" },
@@ -89,6 +94,7 @@ static const struct debug_named_value debug_options[] = {
  
         /* 3D engine options: */
         { "nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used." },
+       { "nongg", DBG(NO_NGG), "Disable NGG and use the legacy pipeline." },
         { "alwayspd", DBG(ALWAYS_PD), "Always enable the primitive discard compute shader." },
         { "pd", DBG(PD), "Enable the primitive discard compute shader for large draw calls." },
         { "nopd", DBG(NO_PD), "Disable the primitive discard compute shader." },
@@ -121,8 +127,7 @@ static const struct debug_named_value debug_options[] = {
         DEBUG_NAMED_VALUE_END /* must be last */
  };
  
-static void si_init_compiler(struct si_screen *sscreen,
-                            struct ac_llvm_compiler *compiler)
+void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler)
  {
         /* Only create the less-optimizing version of the compiler on APUs
          * predating Ryzen (Raven). */
@@ -173,7 +178,7 @@ static void si_destroy_context(struct pipe_context *context)
  
         si_release_all_descriptors(sctx);
  
-       if (sctx->chip_class >= GFX10)
+       if (sctx->chip_class >= GFX10 && sctx->has_graphics)
                 gfx10_destroy_query(sctx);
  
         pipe_resource_reference(&sctx->esgs_ring, NULL);
@@ -230,6 +235,15 @@ static void si_destroy_context(struct pipe_context *context)
         if (sctx->cs_dcc_retile)
                 sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile);
  
+       for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_fmask_expand); i++) {
+               for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_fmask_expand[i]); j++) {
+                       if (sctx->cs_fmask_expand[i][j]) {
+                               sctx->b.delete_compute_state(&sctx->b,
+                                                            sctx->cs_fmask_expand[i][j]);
+                       }
+               }
+       }
+
         if (sctx->blitter)
                 util_blitter_destroy(sctx->blitter);
  
@@ -293,14 +307,39 @@ static void si_destroy_context(struct pipe_context *context)
         util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
         util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress);
         si_unref_sdma_uploads(sctx);
+       free(sctx->sdma_uploads);
         FREE(sctx);
  }
  
  static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx)
  {
         struct si_context *sctx = (struct si_context *)ctx;
+       struct si_screen *sscreen = sctx->screen;
+       enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx);
+
+       if (status != PIPE_NO_RESET) {
+               /* Call the state tracker to set a no-op API dispatch. */
+               if (sctx->device_reset_callback.reset) {
+                       sctx->device_reset_callback.reset(sctx->device_reset_callback.data,
+                                                         status);
+               }
  
-       return sctx->ws->ctx_query_reset_status(sctx->ctx);
+               /* Re-create the auxiliary context, because it won't submit
+                * any new IBs due to a GPU reset.
+                */
+               simple_mtx_lock(&sscreen->aux_context_lock);
+
+               struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
+               sscreen->aux_context->set_log_context(sscreen->aux_context, NULL);
+               sscreen->aux_context->destroy(sscreen->aux_context);
+
+               sscreen->aux_context = si_create_context(&sscreen->b,
+                       (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) |
+                       (sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY));
+               sscreen->aux_context->set_log_context(sscreen->aux_context, aux_log);
+               simple_mtx_unlock(&sscreen->aux_context_lock);
+       }
+       return status;
  }
  
  static void si_set_device_reset_callback(struct pipe_context *ctx,
@@ -315,21 +354,6 @@ static void si_set_device_reset_callback(struct pipe_context *ctx,
                        sizeof(sctx->device_reset_callback));
  }
  
-bool si_check_device_reset(struct si_context *sctx)
-{
-       enum pipe_reset_status status;
-
-       if (!sctx->device_reset_callback.reset)
-               return false;
-
-       status = sctx->ws->ctx_query_reset_status(sctx->ctx);
-       if (status == PIPE_NO_RESET)
-               return false;
-
-       sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status);
-       return true;
-}
-
  /* Apitrace profiling:
   *   1) qapitrace : Tools -> Profile: Measure CPU & GPU times
   *   2) In the middle panel, zoom in (mouse wheel) on some bad draw call
@@ -459,7 +483,13 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
         if (!sctx->ctx)
                 goto fail;
  
-       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
+       if (sscreen->info.num_sdma_rings &&
+           !(sscreen->debug_flags & DBG(NO_ASYNC_DMA)) &&
+           /* SDMA timeouts sometimes on gfx10 so disable it for now. See:
+            *    https://bugs.freedesktop.org/show_bug.cgi?id=111481
+            *    https://gitlab.freedesktop.org/mesa/mesa/issues/1907
+            */
+           (sctx->chip_class != GFX10 || sscreen->debug_flags & DBG(FORCE_DMA))) {
                 sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
                                                    (void*)si_flush_dma_cs,
                                                    sctx, stop_exec_on_failure);
@@ -729,7 +759,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
         if (!sscreen->ws->unref(sscreen->ws))
                 return;
  
-       mtx_destroy(&sscreen->aux_context_lock);
+       simple_mtx_destroy(&sscreen->aux_context_lock);
  
         struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
         if (aux_log) {
@@ -762,13 +792,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
                         FREE(part);
                 }
         }
-       mtx_destroy(&sscreen->shader_parts_mutex);
+       simple_mtx_destroy(&sscreen->shader_parts_mutex);
         si_destroy_shader_cache(sscreen);
  
         si_destroy_perfcounters(sscreen);
         si_gpu_load_kill_thread(sscreen);
  
-       mtx_destroy(&sscreen->gpu_load_mutex);
+       simple_mtx_destroy(&sscreen->gpu_load_mutex);
  
         slab_destroy_parent(&sscreen->pool_transfers);
  
@@ -873,6 +903,10 @@ static void si_disk_cache_create(struct si_screen *sscreen)
         /* These flags affect shader compilation. */
         #define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
         uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
+       /* Reserve left-most bit for tgsi/nir selector */
+       assert(!(shader_debug_flags & (1u << 31)));
+       shader_debug_flags |= (uint32_t)
+               ((sscreen->options.enable_nir & 0x1) << 31);
  
         /* Add the high bits of 32-bit addresses, which affects
          * how 32-bit addresses are expanded to 64 bits.
@@ -913,7 +947,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                             const struct pipe_screen_config *config)
  {
         struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
-       unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;
+       unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads;
  
         if (!sscreen) {
                 return NULL;
@@ -922,7 +956,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
         sscreen->ws = ws;
         ws->query_info(ws, &sscreen->info);
  
-       if (sscreen->info.chip_class == GFX10 && HAVE_LLVM < 0x0900) {
+       if (sscreen->info.chip_class == GFX10 && LLVM_VERSION_MAJOR < 9) {
                 fprintf(stderr, "radeonsi: Navi family support requires LLVM 9 or higher\n");
                 FREE(sscreen);
                 return NULL;
@@ -952,6 +986,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                 si_set_max_shader_compiler_threads;
         sscreen->b.is_parallel_shader_compilation_finished =
                 si_is_parallel_shader_compilation_finished;
+       sscreen->b.finalize_nir = si_finalize_nir;
  
         si_init_screen_get_functions(sscreen);
         si_init_screen_buffer_functions(sscreen);
@@ -986,8 +1021,8 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                        1 << util_logbase2(sscreen->force_aniso));
         }
  
-       (void) mtx_init(&sscreen->aux_context_lock, mtx_plain);
-       (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain);
+       (void) simple_mtx_init(&sscreen->aux_context_lock, mtx_plain);
+       (void) simple_mtx_init(&sscreen->gpu_load_mutex, mtx_plain);
  
         si_init_gs_info(sscreen);
         if (!si_init_shader_cache(sscreen)) {
@@ -995,6 +1030,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                 return NULL;
         }
  
+       {
+#define OPT_BOOL(name, dflt, description) \
+               sscreen->options.name = \
+                       driQueryOptionb(config->options, "radeonsi_"#name);
+#include "si_debug_options.h"
+       }
+
         si_disk_cache_create(sscreen);
  
         /* Determine the number of shader compiler threads. */
@@ -1115,15 +1157,9 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
         sscreen->commutative_blend_add =
                 driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
  
-       {
-#define OPT_BOOL(name, dflt, description) \
-               sscreen->options.name = \
-                       driQueryOptionb(config->options, "radeonsi_"#name);
-#include "si_debug_options.h"
-       }
-
         sscreen->use_ngg = sscreen->info.chip_class >= GFX10 &&
-                          sscreen->info.family != CHIP_NAVI14;
+                          sscreen->info.family != CHIP_NAVI14 &&
+                          !(sscreen->debug_flags & DBG(NO_NGG));
         sscreen->use_ngg_streamout = false;
  
         /* Only enable primitive binning on APUs by default. */
@@ -1158,7 +1194,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
         sscreen->dcc_msaa_allowed =
                 !(sscreen->debug_flags & DBG(NO_DCC_MSAA));
  
-       (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
+       (void) simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
         sscreen->use_monolithic_shaders =
                 (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
  
@@ -1197,11 +1233,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                 }
         }
  
-       for (i = 0; i < num_comp_hi_threads; i++)
-               si_init_compiler(sscreen, &sscreen->compiler[i]);
-       for (i = 0; i < num_comp_lo_threads; i++)
-               si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
-
         sscreen->ge_wave_size = 64;
         sscreen->ps_wave_size = 64;
         sscreen->compute_wave_size = 64;