X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_pipe.c;h=1338d657599b0fcc9b72a1e3b779db07208ced55;hb=3b143369a55d1b79f7db14dda587e18f6b27c975;hp=6d9cce2619b6ce841d4bcc9ddc6b58037adf1b20;hpb=a6b3ca1c70d9b2bbcdea4085d4d345e344470d52;p=mesa.git

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 6d9cce2619b..1338d657599 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -45,6 +45,11 @@
 #include "gallium/winsys/amdgpu/drm/amdgpu_public.h"
 #include <xf86drm.h>
 
+#include <llvm/Config/llvm-config.h>
+
+static struct pipe_context *si_create_context(struct pipe_screen *screen,
+                                              unsigned flags);
+
 static const struct debug_named_value debug_options[] = {
 	/* Shader logging options: */
 	{ "vs", DBG(VS), "Print vertex shaders" },
@@ -59,7 +64,6 @@ static const struct debug_named_value debug_options[] = {
 	{ "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
 
 	/* Shader compiler options the shader cache should be aware of: */
-	{ "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
 	{ "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
 	{ "gisel", DBG(GISEL), "Enable LLVM global instruction selector." },
 	{ "w32ge", DBG(W32_GE), "Use Wave32 for vertex, tessellation, and geometry shaders." },
@@ -90,6 +94,7 @@ static const struct debug_named_value debug_options[] = {
 
 	/* 3D engine options: */
 	{ "nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used." },
+	{ "nongg", DBG(NO_NGG), "Disable NGG and use the legacy pipeline." },
 	{ "alwayspd", DBG(ALWAYS_PD), "Always enable the primitive discard compute shader." },
 	{ "pd", DBG(PD), "Enable the primitive discard compute shader for large draw calls." },
 	{ "nopd", DBG(NO_PD), "Disable the primitive discard compute shader." },
@@ -122,8 +127,7 @@ static const struct debug_named_value debug_options[] = {
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
 
-static void si_init_compiler(struct si_screen *sscreen,
-			     struct ac_llvm_compiler *compiler)
+void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler)
 {
 	/* Only create the less-optimizing version of the compiler on APUs
 	 * predating Ryzen (Raven). */
@@ -174,7 +178,7 @@ static void si_destroy_context(struct pipe_context *context)
 
 	si_release_all_descriptors(sctx);
 
-	if (sctx->chip_class >= GFX10)
+	if (sctx->chip_class >= GFX10 && sctx->has_graphics)
 		gfx10_destroy_query(sctx);
 
 	pipe_resource_reference(&sctx->esgs_ring, NULL);
@@ -231,6 +235,15 @@ static void si_destroy_context(struct pipe_context *context)
 	if (sctx->cs_dcc_retile)
 		sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile);
 
+	for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_fmask_expand); i++) {
+		for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_fmask_expand[i]); j++) {
+			if (sctx->cs_fmask_expand[i][j]) {
+				sctx->b.delete_compute_state(&sctx->b,
+							     sctx->cs_fmask_expand[i][j]);
+			}
+		}
+	}
+
 	if (sctx->blitter)
 		util_blitter_destroy(sctx->blitter);
 
@@ -294,14 +307,39 @@ static void si_destroy_context(struct pipe_context *context)
 	util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
 	util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress);
 	si_unref_sdma_uploads(sctx);
+	free(sctx->sdma_uploads);
 	FREE(sctx);
 }
 
 static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_screen *sscreen = sctx->screen;
+	enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx);
+
+	if (status != PIPE_NO_RESET) {
+		/* Call the state tracker to set a no-op API dispatch. */
+		if (sctx->device_reset_callback.reset) {
+			sctx->device_reset_callback.reset(sctx->device_reset_callback.data,
+							  status);
+		}
 
-	return sctx->ws->ctx_query_reset_status(sctx->ctx);
+		/* Re-create the auxiliary context, because it won't submit
+		 * any new IBs due to a GPU reset.
+		 */
+		simple_mtx_lock(&sscreen->aux_context_lock);
+
+		struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
+		sscreen->aux_context->set_log_context(sscreen->aux_context, NULL);
+		sscreen->aux_context->destroy(sscreen->aux_context);
+
+		sscreen->aux_context = si_create_context(&sscreen->b,
+			(sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) |
+			(sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY));
+		sscreen->aux_context->set_log_context(sscreen->aux_context, aux_log);
+		simple_mtx_unlock(&sscreen->aux_context_lock);
+	}
+	return status;
 }
 
 static void si_set_device_reset_callback(struct pipe_context *ctx,
@@ -316,21 +354,6 @@ static void si_set_device_reset_callback(struct pipe_context *ctx,
 		       sizeof(sctx->device_reset_callback));
 }
 
-bool si_check_device_reset(struct si_context *sctx)
-{
-	enum pipe_reset_status status;
-
-	if (!sctx->device_reset_callback.reset)
-		return false;
-
-	status = sctx->ws->ctx_query_reset_status(sctx->ctx);
-	if (status == PIPE_NO_RESET)
-		return false;
-
-	sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status);
-	return true;
-}
-
 /* Apitrace profiling:
  *   1) qapitrace : Tools -> Profile: Measure CPU & GPU times
  *   2) In the middle panel, zoom in (mouse wheel) on some bad draw call
@@ -460,7 +483,13 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	if (!sctx->ctx)
 		goto fail;
 
-	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
+	if (sscreen->info.num_rings[RING_DMA] &&
+	    !(sscreen->debug_flags & DBG(NO_ASYNC_DMA)) &&
+	    /* SDMA timeouts sometimes on gfx10 so disable it for now. See:
+	     *    https://bugs.freedesktop.org/show_bug.cgi?id=111481
+	     *    https://gitlab.freedesktop.org/mesa/mesa/issues/1907
+	     */
+	    (sctx->chip_class != GFX10 || sscreen->debug_flags & DBG(FORCE_DMA))) {
 		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
 						   (void*)si_flush_dma_cs,
 						   sctx, stop_exec_on_failure);
@@ -730,7 +759,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 	if (!sscreen->ws->unref(sscreen->ws))
 		return;
 
-	mtx_destroy(&sscreen->aux_context_lock);
+	simple_mtx_destroy(&sscreen->aux_context_lock);
 
 	struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
 	if (aux_log) {
@@ -744,6 +773,9 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 	util_queue_destroy(&sscreen->shader_compiler_queue);
 	util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
 
+	/* Release the reference on glsl types of the compiler threads. */
+	glsl_type_singleton_decref();
+
 	for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
 		si_destroy_compiler(&sscreen->compiler[i]);
 
@@ -760,13 +792,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 			FREE(part);
 		}
 	}
-	mtx_destroy(&sscreen->shader_parts_mutex);
+	simple_mtx_destroy(&sscreen->shader_parts_mutex);
 	si_destroy_shader_cache(sscreen);
 
 	si_destroy_perfcounters(sscreen);
 	si_gpu_load_kill_thread(sscreen);
 
-	mtx_destroy(&sscreen->gpu_load_mutex);
+	simple_mtx_destroy(&sscreen->gpu_load_mutex);
 
 	slab_destroy_parent(&sscreen->pool_transfers);
 
@@ -869,22 +901,12 @@ static void si_disk_cache_create(struct si_screen *sscreen)
 	disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
 
 	/* These flags affect shader compilation. */
-	#define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |	\
-			   DBG(SI_SCHED) |			\
-			   DBG(GISEL) |				\
-			   DBG(UNSAFE_MATH) |			\
-			   DBG(W32_GE) |			\
-			   DBG(W32_PS) |			\
-			   DBG(W32_CS) |			\
-			   DBG(W64_GE) |			\
-			   DBG(W64_PS) |			\
-			   DBG(W64_CS))
+	#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
 	uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
-
-	if (sscreen->options.enable_nir) {
-		STATIC_ASSERT((ALL_FLAGS & (1u << 31)) == 0);
-		shader_debug_flags |= 1u << 31;
-	}
+	/* Reserve left-most bit for tgsi/nir selector */
+	assert(!(shader_debug_flags & (1u << 31)));
+	shader_debug_flags |= (uint32_t)
+		((sscreen->options.enable_nir & 0x1) << 31);
 
 	/* Add the high bits of 32-bit addresses, which affects
 	 * how 32-bit addresses are expanded to 64 bits.
@@ -925,7 +947,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 			    const struct pipe_screen_config *config)
 {
 	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
-	unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;
+	unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads;
 
 	if (!sscreen) {
 		return NULL;
@@ -934,7 +956,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 	sscreen->ws = ws;
 	ws->query_info(ws, &sscreen->info);
 
-	if (sscreen->info.chip_class == GFX10 && HAVE_LLVM < 0x0900) {
+	if (sscreen->info.chip_class == GFX10 && LLVM_VERSION_MAJOR < 9) {
 		fprintf(stderr, "radeonsi: Navi family support requires LLVM 9 or higher\n");
 		FREE(sscreen);
 		return NULL;
@@ -964,6 +986,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 		si_set_max_shader_compiler_threads;
 	sscreen->b.is_parallel_shader_compilation_finished =
 		si_is_parallel_shader_compilation_finished;
+	sscreen->b.finalize_nir = si_finalize_nir;
 
 	si_init_screen_get_functions(sscreen);
 	si_init_screen_buffer_functions(sscreen);
@@ -998,8 +1021,8 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 		       1 << util_logbase2(sscreen->force_aniso));
 	}
 
-	(void) mtx_init(&sscreen->aux_context_lock, mtx_plain);
-	(void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain);
+	(void) simple_mtx_init(&sscreen->aux_context_lock, mtx_plain);
+	(void) simple_mtx_init(&sscreen->gpu_load_mutex, mtx_plain);
 
 	si_init_gs_info(sscreen);
 	if (!si_init_shader_cache(sscreen)) {
@@ -1007,6 +1030,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 		return NULL;
 	}
 
+	{
+#define OPT_BOOL(name, dflt, description) \
+		sscreen->options.name = \
+			driQueryOptionb(config->options, "radeonsi_"#name);
+#include "si_debug_options.h"
+	}
+
 	si_disk_cache_create(sscreen);
 
 	/* Determine the number of shader compiler threads. */
@@ -1031,12 +1061,16 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 	num_comp_lo_threads = MIN2(num_comp_lo_threads,
 				   ARRAY_SIZE(sscreen->compiler_lowp));
 
+	/* Take a reference on the glsl types for the compiler threads. */
+	glsl_type_singleton_init_or_ref();
+
 	if (!util_queue_init(&sscreen->shader_compiler_queue, "sh",
 			     64, num_comp_hi_threads,
 			     UTIL_QUEUE_INIT_RESIZE_IF_FULL |
 			     UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) {
 		si_destroy_shader_cache(sscreen);
 		FREE(sscreen);
+		glsl_type_singleton_decref();
 		return NULL;
 	}
 
@@ -1048,6 +1082,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 			     UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
 	       si_destroy_shader_cache(sscreen);
 	       FREE(sscreen);
+	       glsl_type_singleton_decref();
 	       return NULL;
 	}
 
@@ -1103,16 +1138,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
 	}
 
-	/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
-	 * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
-	 * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel. */
-	sscreen->has_clear_state = sscreen->info.chip_class >= GFX7 &&
-				   sscreen->info.is_amdgpu;
-
-	sscreen->has_distributed_tess =
-		sscreen->info.chip_class >= GFX8 &&
-		sscreen->info.max_se >= 2;
-
 	sscreen->has_draw_indirect_multi =
 		(sscreen->info.family >= CHIP_POLARIS10) ||
 		(sscreen->info.chip_class == GFX8 &&
@@ -1125,34 +1150,17 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 		 sscreen->info.pfp_fw_version >= 79 &&
 		 sscreen->info.me_fw_version >= 142);
 
-	sscreen->has_out_of_order_rast = sscreen->info.chip_class >= GFX8 &&
-					 sscreen->info.max_se >= 2 &&
+	sscreen->has_out_of_order_rast = sscreen->info.has_out_of_order_rast &&
 					 !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
 	sscreen->assume_no_z_fights =
 		driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
 	sscreen->commutative_blend_add =
 		driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
 
-	{
-#define OPT_BOOL(name, dflt, description) \
-		sscreen->options.name = \
-			driQueryOptionb(config->options, "radeonsi_"#name);
-#include "si_debug_options.h"
-	}
-
-	sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
-					sscreen->info.family == CHIP_RAVEN;
-	sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
-					    sscreen->info.family <= CHIP_POLARIS12) ||
-					   sscreen->info.family == CHIP_VEGA10 ||
-					   sscreen->info.family == CHIP_RAVEN;
-	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
-					sscreen->info.family == CHIP_RAVEN;
-	sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2 ||
-					   sscreen->info.family == CHIP_RENOIR ||
-					   sscreen->info.chip_class >= GFX10;
-	sscreen->use_ngg = sscreen->info.chip_class >= GFX10;
-	sscreen->use_ngg_streamout = sscreen->info.chip_class >= GFX10;
+	sscreen->use_ngg = sscreen->info.chip_class >= GFX10 &&
+			   sscreen->info.family != CHIP_NAVI14 &&
+			   !(sscreen->debug_flags & DBG(NO_NGG));
+	sscreen->use_ngg_streamout = false;
 
 	/* Only enable primitive binning on APUs by default. */
 	if (sscreen->info.chip_class >= GFX10) {
@@ -1183,28 +1191,10 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 	 */
 	sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class != GFX9;
 
-	/* Some chips have RB+ registers, but don't support RB+. Those must
-	 * always disable it.
-	 */
-	if (sscreen->info.family == CHIP_STONEY ||
-	    sscreen->info.chip_class >= GFX9) {
-		sscreen->has_rbplus = true;
-
-		sscreen->rbplus_allowed =
-			!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
-			(sscreen->info.family == CHIP_STONEY ||
-			 sscreen->info.family == CHIP_VEGA12 ||
-			 sscreen->info.family == CHIP_RAVEN ||
-			 sscreen->info.family == CHIP_RAVEN2 ||
-			 sscreen->info.family == CHIP_RENOIR);
-	}
-
 	sscreen->dcc_msaa_allowed =
 		!(sscreen->debug_flags & DBG(NO_DCC_MSAA));
 
-	sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= GFX8;
-
-	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
+	(void) simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
 	sscreen->use_monolithic_shaders =
 		(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
 
@@ -1243,11 +1233,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
 		}
 	}
 
-	for (i = 0; i < num_comp_hi_threads; i++)
-		si_init_compiler(sscreen, &sscreen->compiler[i]);
-	for (i = 0; i < num_comp_lo_threads; i++)
-		si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
-
 	sscreen->ge_wave_size = 64;
 	sscreen->ps_wave_size = 64;
 	sscreen->compute_wave_size = 64;