From 420fe1e7f9ef56177c8f45e98e057488a2b57646 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 3 Jan 2020 23:15:27 -0500
Subject: [PATCH] radeonsi: remove TGSI

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 src/gallium/drivers/radeonsi/Makefile.sources |    5 +-
 src/gallium/drivers/radeonsi/meson.build      |    5 +-
 src/gallium/drivers/radeonsi/si_compute.c     |   31 +-
 .../drivers/radeonsi/si_debug_options.h       |    1 -
 src/gallium/drivers/radeonsi/si_get.c         |   27 +-
 src/gallium/drivers/radeonsi/si_pipe.c        |    6 +-
 src/gallium/drivers/radeonsi/si_shader.c      | 1384 +-----------
 src/gallium/drivers/radeonsi/si_shader.h      |    9 +-
 .../drivers/radeonsi/si_shader_internal.h     |  104 +-
 src/gallium/drivers/radeonsi/si_shader_llvm.c |  239 +++
 .../drivers/radeonsi/si_shader_llvm_build.c   |  219 ++
 .../drivers/radeonsi/si_shader_tgsi_alu.c     |  834 --------
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 1852 -----------------
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 1165 -----------
 .../drivers/radeonsi/si_state_shaders.c       |   57 +-
 src/util/00-mesa-defaults.conf                |    6 -
 16 files changed, 598 insertions(+), 5346 deletions(-)
 create mode 100644 src/gallium/drivers/radeonsi/si_shader_llvm.c
 create mode 100644 src/gallium/drivers/radeonsi/si_shader_llvm_build.c
 delete mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
 delete mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
 delete mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 886aaf6fa34..5d658b744d0 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -35,10 +35,9 @@ C_SOURCES := \
 	si_shader.c \
 	si_shader.h \
 	si_shader_internal.h \
+	si_shader_llvm.c \
+	si_shader_llvm_build.c \
 	si_shader_nir.c \
-	si_shader_tgsi_alu.c \
-	si_shader_tgsi_mem.c \
-	si_shader_tgsi_setup.c \
 	si_shaderlib_tgsi.c \
 	si_state.c \
 	si_state_binning.c \
diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build
index d2d3dd684b0..a0bd10f6ac9 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -50,10 +50,9 @@ files_libradeonsi = files(
   'si_shader.c',
   'si_shader.h',
   'si_shader_internal.h',
+  'si_shader_llvm.c',
+  'si_shader_llvm_build.c',
   'si_shader_nir.c',
-  'si_shader_tgsi_alu.c',
-  'si_shader_tgsi_mem.c',
-  'si_shader_tgsi_setup.c',
   'si_shaderlib_tgsi.c',
   'si_state.c',
   'si_state.h',
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 7abea1927cd..f264b880d29 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -24,7 +24,6 @@
  */
 
 #include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
 #include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
@@ -124,13 +123,8 @@ static void si_create_compute_state_async(void *job, int thread_index)
 	if (!compiler->passes)
 		si_init_compiler(sscreen, compiler);
 
-	if (program->ir_type == PIPE_SHADER_IR_TGSI) {
-		tgsi_scan_shader(sel->tokens, &sel->info);
-	} else {
-		assert(program->ir_type == PIPE_SHADER_IR_NIR);
-
-		si_nir_scan_shader(sel->nir, &sel->info);
-	}
+	assert(program->ir_type == PIPE_SHADER_IR_NIR);
+	si_nir_scan_shader(sel->nir, &sel->info);
 
 	/* Store the declared LDS size into tgsi_shader_info for the shader
 	 * cache to include it.
@@ -167,9 +161,6 @@ static void si_create_compute_state_async(void *job, int thread_index)
 
 		if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
 			program->shader.compilation_failed = true;
-
-			if (program->ir_type == PIPE_SHADER_IR_TGSI)
-				FREE(sel->tokens);
 			return;
 		}
 
@@ -209,8 +200,6 @@ static void si_create_compute_state_async(void *job, int thread_index)
 		simple_mtx_unlock(&sscreen->shader_cache_mutex);
 	}
 
-	FREE(sel->tokens);
-	sel->tokens = NULL;
 	ralloc_free(sel->nir);
 	sel->nir = NULL;
 }
@@ -234,16 +223,9 @@ static void *si_create_compute_state(
 	program->input_size = cso->req_input_mem;
 
 	if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
-		if (sscreen->options.enable_nir &&
-		    cso->ir_type == PIPE_SHADER_IR_TGSI) {
+		if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
 			program->ir_type = PIPE_SHADER_IR_NIR;
 			sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
-		} else if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
-			sel->tokens = tgsi_dup_tokens(cso->prog);
-			if (!sel->tokens) {
-				FREE(program);
-				return NULL;
-			}
 		} else {
 			assert(cso->ir_type == PIPE_SHADER_IR_NIR);
 			sel->nir = (struct nir_shader *) cso->prog;
@@ -719,8 +701,8 @@ static bool si_upload_compute_input(struct si_context *sctx,
 	return true;
 }
 
-static void si_setup_tgsi_user_data(struct si_context *sctx,
-                                const struct pipe_grid_info *info)
+static void si_setup_nir_user_data(struct si_context *sctx,
+				   const struct pipe_grid_info *info)
 {
 	struct si_compute *program = sctx->cs_shader_state.program;
 	struct si_shader_selector *sel = &program->sel;
@@ -944,7 +926,7 @@ static void si_launch_grid(
 	}
 
 	if (program->ir_type != PIPE_SHADER_IR_NATIVE)
-		si_setup_tgsi_user_data(sctx, info);
+		si_setup_nir_user_data(sctx, info);
 
 	si_emit_dispatch_packets(sctx, info);
 
@@ -977,7 +959,6 @@ void si_destroy_compute(struct si_compute *program)
 	FREE(program->global_buffers);
 
 	si_shader_destroy(&program->shader);
-	FREE(program->sel.tokens);
 	ralloc_free(program->sel.nir);
 	FREE(program);
 }
diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h
index 7ba835acf84..9a0dd0c9f78 100644
--- a/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/src/gallium/drivers/radeonsi/si_debug_options.h
@@ -1,5 +1,4 @@
 OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth clear")
-OPT_BOOL(enable_nir, true, "Enable NIR")
 OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context")
 OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause stalls)")
 OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of ddebug_dumps")
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index c34c8649bcf..1adbafda53a 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -159,6 +159,9 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_IMAGE_LOAD_FORMATTED:
 	case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA:
 	case PIPE_CAP_TGSI_DIV:
+	case PIPE_CAP_PACKED_UNIFORMS:
+	case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
+	case PIPE_CAP_GL_SPIRV:
 		return 1;
 
 	case PIPE_CAP_QUERY_SO_OVERFLOW:
@@ -195,7 +198,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
 		if (!sscreen->info.has_indirect_compute_dispatch)
 			return 420;
-		return sscreen->options.enable_nir ? 460 : 450;
+		return 460;
 
 	case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
 		/* Optimal number for good TexSubImage performance on Polaris10. */
@@ -214,15 +217,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		return sscreen->info.has_sparse_vm_mappings ?
 				RADEON_SPARSE_PAGE_SIZE : 0;
 
-	case PIPE_CAP_PACKED_UNIFORMS:
-	case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
-	case PIPE_CAP_GL_SPIRV:
-		return sscreen->options.enable_nir;
-
-	case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
-		if (sscreen->options.enable_nir)
-			return 0;
-		return 1;
 
 	/* Unsupported features. */
 	case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
@@ -246,6 +240,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
 	case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
 	case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
+	case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
 		return 0;
 
 	case PIPE_CAP_FENCE_SIGNAL:
@@ -395,14 +390,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
 			int ir = 1 << PIPE_SHADER_IR_NATIVE;
 
 			if (sscreen->info.has_indirect_compute_dispatch)
-				ir |= 1 << PIPE_SHADER_IR_TGSI;
+				ir |= 1 << PIPE_SHADER_IR_NIR;
 
 			return ir;
 		}
 
 		case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
 			uint64_t max_const_buffer_size;
-			pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
+			pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_NIR,
 				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
 				&max_const_buffer_size);
 			return MIN2(max_const_buffer_size, INT_MAX);
@@ -444,13 +439,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
 	case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
 		return SI_NUM_IMAGES;
 	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
-		if (sscreen->options.enable_nir)
-			return 0;
-		return 32;
+		return 0;
 	case PIPE_SHADER_CAP_PREFERRED_IR:
-		if (sscreen->options.enable_nir)
-			return PIPE_SHADER_IR_NIR;
-		return PIPE_SHADER_IR_TGSI;
+		return PIPE_SHADER_IR_NIR;
 	case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
 		return 4;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 2e3232d1cf0..755c768fb0b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -660,7 +660,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	}
 
 	uint64_t max_threads_per_block;
-	screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+	screen->get_compute_param(screen, PIPE_SHADER_IR_NIR,
 				  PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
 				  &max_threads_per_block);
 
@@ -910,10 +910,6 @@ static void si_disk_cache_create(struct si_screen *sscreen)
 	/* These flags affect shader compilation. */
 	#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
 	uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
-	/* Reserve left-most bit for tgsi/nir selector */
-	assert(!(shader_debug_flags & (1u << 31)));
-	shader_debug_flags |= (uint32_t)
-		((sscreen->options.enable_nir & 0x1) << 31);
 
 	/* Add the high bits of 32-bit addresses, which affects
 	 * how 32-bit addresses are expanded to 64 bits.
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e6678e026cd..65a070b4570 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -25,14 +25,9 @@
 #include <llvm/Config/llvm-config.h>
 
 #include "util/u_memory.h"
-#include "util/u_string.h"
-#include "tgsi/tgsi_build.h"
 #include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_from_mesa.h"
 
-#include "ac_binary.h"
 #include "ac_exp_param.h"
 #include "ac_shader_util.h"
 #include "ac_rtld.h"
@@ -50,15 +45,7 @@ static const char scratch_rsrc_dword0_symbol[] =
 static const char scratch_rsrc_dword1_symbol[] =
 	"SCRATCH_RSRC_DWORD1";
 
-static void si_init_shader_ctx(struct si_shader_context *ctx,
-			       struct si_screen *sscreen,
-			       struct ac_llvm_compiler *compiler,
-			       unsigned wave_size,
-			       bool nir);
-
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
-				 struct lp_build_tgsi_context *bld_base,
-				 struct lp_build_emit_data *emit_data);
+static void si_llvm_emit_barrier(struct si_shader_context *ctx);
 
 static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
 
@@ -596,15 +583,6 @@ void si_llvm_load_input_vs(
 		out[i] = ac_to_float(&ctx->ac, fetches[i]);
 }
 
-static void declare_input_vs(
-	struct si_shader_context *ctx,
-	unsigned input_index,
-	const struct tgsi_full_declaration *decl,
-	LLVMValueRef out[4])
-{
-	si_llvm_load_input_vs(ctx, input_index, out);
-}
-
 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
 				 unsigned swizzle)
 {
@@ -626,53 +604,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
 	}
 }
 
-/**
- * Return the value of tgsi_ind_register for indexing.
- * This is the indirect index with the constant offset added to it.
- */
-LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
-				   const struct tgsi_ind_register *ind,
-				   unsigned addr_mul,
-				   int rel_index)
-{
-	LLVMValueRef result;
-
-	if (ind->File == TGSI_FILE_ADDRESS) {
-		result = ctx->addrs[ind->Index][ind->Swizzle];
-		result = LLVMBuildLoad(ctx->ac.builder, result, "");
-	} else {
-		struct tgsi_full_src_register src = {};
-
-		src.Register.File = ind->File;
-		src.Register.Index = ind->Index;
-
-		/* Set the second index to 0 for constants. */
-		if (ind->File == TGSI_FILE_CONSTANT)
-			src.Register.Dimension = 1;
-
-		result = ctx->bld_base.emit_fetch_funcs[ind->File](&ctx->bld_base, &src,
-								   TGSI_TYPE_SIGNED,
-								   ind->Swizzle);
-		result = ac_to_integer(&ctx->ac, result);
-	}
-
-	return ac_build_imad(&ctx->ac, result, LLVMConstInt(ctx->i32, addr_mul, 0),
-			     LLVMConstInt(ctx->i32, rel_index, 0));
-}
-
-/**
- * Like si_get_indirect_index, but restricts the return value to a (possibly
- * undefined) value inside [0..num).
- */
-LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
-					   const struct tgsi_ind_register *ind,
-					   int rel_index, unsigned num)
-{
-	LLVMValueRef result = si_get_indirect_index(ctx, ind, 1, rel_index);
-
-	return si_llvm_bound_index(ctx, result, num);
-}
-
 static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx,
 							LLVMValueRef vertex_dw_stride,
 							LLVMValueRef base_addr,
@@ -701,78 +632,6 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context
 			    LLVMConstInt(ctx->i32, param * 4, 0), "");
 }
 
-/**
- * Calculate a dword address given an input or output register and a stride.
- */
-static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
-				   const struct tgsi_full_dst_register *dst,
-				   const struct tgsi_full_src_register *src,
-				   LLVMValueRef vertex_dw_stride,
-				   LLVMValueRef base_addr)
-{
-	struct tgsi_shader_info *info = &ctx->shader->selector->info;
-	ubyte *name, *index, *array_first;
-	int input_index;
-	struct tgsi_full_dst_register reg;
-	LLVMValueRef vertex_index = NULL;
-	LLVMValueRef ind_index = NULL;
-
-	/* Set the register description. The address computation is the same
-	 * for sources and destinations. */
-	if (src) {
-		reg.Register.File = src->Register.File;
-		reg.Register.Index = src->Register.Index;
-		reg.Register.Indirect = src->Register.Indirect;
-		reg.Register.Dimension = src->Register.Dimension;
-		reg.Indirect = src->Indirect;
-		reg.Dimension = src->Dimension;
-		reg.DimIndirect = src->DimIndirect;
-	} else
-		reg = *dst;
-
-	/* If the register is 2-dimensional (e.g. an array of vertices
-	 * in a primitive), calculate the base address of the vertex. */
-	if (reg.Register.Dimension) {
-		if (reg.Dimension.Indirect)
-			vertex_index = si_get_indirect_index(ctx, &reg.DimIndirect,
-						      1, reg.Dimension.Index);
-		else
-			vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
-	}
-
-	/* Get information about the register. */
-	if (reg.Register.File == TGSI_FILE_INPUT) {
-		name = info->input_semantic_name;
-		index = info->input_semantic_index;
-		array_first = info->input_array_first;
-	} else if (reg.Register.File == TGSI_FILE_OUTPUT) {
-		name = info->output_semantic_name;
-		index = info->output_semantic_index;
-		array_first = info->output_array_first;
-	} else {
-		assert(0);
-		return NULL;
-	}
-
-	if (reg.Register.Indirect) {
-		/* Add the relative address of the element. */
-		if (reg.Indirect.ArrayID)
-			input_index = array_first[reg.Indirect.ArrayID];
-		else
-			input_index = reg.Register.Index;
-
-		ind_index = si_get_indirect_index(ctx, &reg.Indirect,
-						  1, reg.Register.Index - input_index);
-	} else {
-		input_index = reg.Register.Index;
-	}
-
-	return get_dw_address_from_generic_indices(ctx, vertex_dw_stride,
-						   base_addr, vertex_index,
-						   ind_index, name[input_index],
-						   index[input_index]);
-}
-
 /* The offchip buffer layout for TCS->TES is
  *
  * - attribute 0 of patch 0 vertex 0
@@ -854,65 +713,24 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
 					  vertex_index, param_index);
 }
 
-static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
-                                       struct si_shader_context *ctx,
-                                       const struct tgsi_full_dst_register *dst,
-                                       const struct tgsi_full_src_register *src)
+static LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
+					  LLVMTypeRef type,
+					  LLVMValueRef val1,
+					  LLVMValueRef val2)
 {
-	struct tgsi_shader_info *info = &ctx->shader->selector->info;
-	ubyte *name, *index, *array_first;
-	struct tgsi_full_src_register reg;
-	LLVMValueRef vertex_index = NULL;
-	LLVMValueRef param_index = NULL;
-	unsigned param_base;
-
-	reg = src ? *src : tgsi_full_src_register_from_dst(dst);
-
-	if (reg.Register.Dimension) {
-		if (reg.Dimension.Indirect)
-			vertex_index = si_get_indirect_index(ctx, &reg.DimIndirect,
-							     1, reg.Dimension.Index);
-		else
-			vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
-	}
-
-	/* Get information about the register. */
-	if (reg.Register.File == TGSI_FILE_INPUT) {
-		name = info->input_semantic_name;
-		index = info->input_semantic_index;
-		array_first = info->input_array_first;
-	} else if (reg.Register.File == TGSI_FILE_OUTPUT) {
-		name = info->output_semantic_name;
-		index = info->output_semantic_index;
-		array_first = info->output_array_first;
-	} else {
-		assert(0);
-		return NULL;
-	}
-
-	if (reg.Register.Indirect) {
-		if (reg.Indirect.ArrayID)
-			param_base = array_first[reg.Indirect.ArrayID];
-		else
-			param_base = reg.Register.Index;
-
-		param_index = si_get_indirect_index(ctx, &reg.Indirect,
-						    1, reg.Register.Index - param_base);
-	} else {
-		param_base = reg.Register.Index;
-	}
-
-	return get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
-							       param_index, name[param_base],
-							       index[param_base]);
+	LLVMValueRef values[2] = {
+		ac_to_integer(&ctx->ac, val1),
+		ac_to_integer(&ctx->ac, val2),
+	};
+	LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
+	return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 }
 
-static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
+static LLVMValueRef buffer_load(struct si_shader_context *ctx,
                                 LLVMTypeRef type, unsigned swizzle,
                                 LLVMValueRef buffer, LLVMValueRef offset,
                                 LLVMValueRef base, bool can_speculate)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef value, value2;
 	LLVMTypeRef vec_type = LLVMVectorType(type, 4);
 
@@ -938,7 +756,7 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
 	value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
 	                           swizzle * 4 + 4, ac_glc, can_speculate, false);
 
-	return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+	return si_build_gather_64bit(ctx, type, value, value2);
 }
 
 /**
@@ -948,30 +766,28 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
  * \param swizzle	offset (typically 0..3); it can be ~0, which loads a vec4
  * \param dw_addr	address in dwords
  */
-static LLVMValueRef lshs_lds_load(struct lp_build_tgsi_context *bld_base,
-			     LLVMTypeRef type, unsigned swizzle,
-			     LLVMValueRef dw_addr)
+static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx,
+				  LLVMTypeRef type, unsigned swizzle,
+				  LLVMValueRef dw_addr)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef value;
 
 	if (swizzle == ~0) {
-		LLVMValueRef values[TGSI_NUM_CHANNELS];
+		LLVMValueRef values[4];
 
-		for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
-			values[chan] = lshs_lds_load(bld_base, type, chan, dw_addr);
+		for (unsigned chan = 0; chan < 4; chan++)
+			values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
 
-		return ac_build_gather_values(&ctx->ac, values,
-					      TGSI_NUM_CHANNELS);
+		return ac_build_gather_values(&ctx->ac, values, 4);
 	}
 
 	/* Split 64-bit loads. */
 	if (llvm_type_is_64bit(ctx, type)) {
 		LLVMValueRef lo, hi;
 
-		lo = lshs_lds_load(bld_base, ctx->i32, swizzle, dw_addr);
-		hi = lshs_lds_load(bld_base, ctx->i32, swizzle + 1, dw_addr);
-		return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+		lo = lshs_lds_load(ctx, ctx->i32, swizzle, dw_addr);
+		hi = lshs_lds_load(ctx, ctx->i32, swizzle + 1, dw_addr);
+		return si_build_gather_64bit(ctx, type, lo, hi);
 	}
 
 	dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
@@ -1049,21 +865,6 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx,
 	return ac_build_gather_values(&ctx->ac, desc, 4);
 }
 
-static LLVMValueRef fetch_input_tcs(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type, unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef dw_addr, stride;
-	unsigned swizzle = swizzle_in & 0xffff;
-	stride = get_tcs_in_vertex_dw_stride(ctx);
-	dw_addr = get_tcs_in_current_patch_offset(ctx);
-	dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
-
-	return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
-}
-
 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
 					     LLVMTypeRef type,
 					     LLVMValueRef vertex_index,
@@ -1079,7 +880,6 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
 	struct tgsi_shader_info *info = &ctx->shader->selector->info;
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	LLVMValueRef dw_addr, stride;
 	ubyte name, index;
 
@@ -1125,49 +925,12 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
 			offset *= 2;
 
 		offset += component;
-		value[i + component] = lshs_lds_load(bld_base, type, offset, dw_addr);
+		value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
 	}
 
 	return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 }
 
-static LLVMValueRef fetch_output_tcs(
-		struct lp_build_tgsi_context *bld_base,
-		const struct tgsi_full_src_register *reg,
-		enum tgsi_opcode_type type, unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef dw_addr, stride;
-	unsigned swizzle = (swizzle_in & 0xffff);
-
-	if (reg->Register.Dimension) {
-		stride = get_tcs_out_vertex_dw_stride(ctx);
-		dw_addr = get_tcs_out_current_patch_offset(ctx);
-		dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
-	} else {
-		dw_addr = get_tcs_out_current_patch_data_offset(ctx);
-		dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr);
-	}
-
-	return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
-}
-
-static LLVMValueRef fetch_input_tes(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type, unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef base, addr;
-	unsigned swizzle = (swizzle_in & 0xffff);
-
-	base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-	addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
-
-	return buffer_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle,
-			   ctx->tess_offchip_ring, base, addr, true);
-}
-
 LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
 				   LLVMTypeRef type,
 				   LLVMValueRef vertex_index,
@@ -1226,110 +989,13 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
 		}
 
 		offset += component;
-		value[i + component] = buffer_load(&ctx->bld_base, type, offset,
+		value[i + component] = buffer_load(ctx, type, offset,
 						   ctx->tess_offchip_ring, base, addr, true);
 	}
 
 	return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 }
 
-static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
-			     const struct tgsi_full_instruction *inst,
-			     const struct tgsi_opcode_info *info,
-			     unsigned index,
-			     LLVMValueRef dst[4])
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
-	const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
-	unsigned chan_index;
-	LLVMValueRef dw_addr, stride;
-	LLVMValueRef buffer, base, buf_addr;
-	LLVMValueRef values[4];
-	bool skip_lds_store;
-	bool is_tess_factor = false, is_tess_inner = false;
-
-	/* Only handle per-patch and per-vertex outputs here.
-	 * Vectors will be lowered to scalars and this function will be called again.
-	 */
-	if (reg->Register.File != TGSI_FILE_OUTPUT ||
-	    (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
-		si_llvm_emit_store(bld_base, inst, info, index, dst);
-		return;
-	}
-
-	if (reg->Register.Dimension) {
-		stride = get_tcs_out_vertex_dw_stride(ctx);
-		dw_addr = get_tcs_out_current_patch_offset(ctx);
-		dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
-		skip_lds_store = !sh_info->reads_pervertex_outputs;
-	} else {
-		dw_addr = get_tcs_out_current_patch_data_offset(ctx);
-		dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr);
-		skip_lds_store = !sh_info->reads_perpatch_outputs;
-
-		if (!reg->Register.Indirect) {
-			int name = sh_info->output_semantic_name[reg->Register.Index];
-
-			/* Always write tess factors into LDS for the TCS epilog. */
-			if (name == TGSI_SEMANTIC_TESSINNER ||
-			    name == TGSI_SEMANTIC_TESSOUTER) {
-				/* The epilog doesn't read LDS if invocation 0 defines tess factors. */
-				skip_lds_store = !sh_info->reads_tessfactor_outputs &&
-						 ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
-				is_tess_factor = true;
-				is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
-			}
-		}
-	}
-
-	buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
-
-	base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-	buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
-
-	uint32_t writemask = reg->Register.WriteMask;
-	while (writemask) {
-		chan_index = u_bit_scan(&writemask);
-		LLVMValueRef value = dst[chan_index];
-
-		if (inst->Instruction.Saturate)
-			value = ac_build_clamp(&ctx->ac, value);
-
-		/* Skip LDS stores if there is no LDS read of this output. */
-		if (!skip_lds_store)
-			lshs_lds_store(ctx, chan_index, dw_addr, value);
-
-		value = ac_to_integer(&ctx->ac, value);
-		values[chan_index] = value;
-
-		if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
-			ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
-						    buf_addr, base,
-						    4 * chan_index, ac_glc);
-		}
-
-		/* Write tess factors into VGPRs for the epilog. */
-		if (is_tess_factor &&
-		    ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
-			if (!is_tess_inner) {
-				LLVMBuildStore(ctx->ac.builder, value, /* outer */
-					       ctx->invoc0_tess_factors[chan_index]);
-			} else if (chan_index < 2) {
-				LLVMBuildStore(ctx->ac.builder, value, /* inner */
-					       ctx->invoc0_tess_factors[4 + chan_index]);
-			}
-		}
-	}
-
-	if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
-		LLVMValueRef value = ac_build_gather_values(&ctx->ac,
-		                                            values, 4);
-		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
-					    base, 0, ac_glc);
-	}
-}
-
 static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
 				    const struct nir_variable *var,
 				    LLVMValueRef vertex_index,
@@ -1452,14 +1118,13 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
 	}
 }
 
-LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
-				   unsigned input_index,
-				   unsigned vtx_offset_param,
-				   LLVMTypeRef type,
-				   unsigned swizzle)
+static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
+					  unsigned input_index,
+					  unsigned vtx_offset_param,
+					  LLVMTypeRef type,
+					  unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	struct si_shader *shader = ctx->shader;
 	LLVMValueRef vtx_offset, soffset;
 	struct tgsi_shader_info *info = &shader->selector->info;
@@ -1512,14 +1177,13 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
 
 	/* GFX6: input load from the ESGS ring in memory. */
 	if (swizzle == ~0) {
-		LLVMValueRef values[TGSI_NUM_CHANNELS];
+		LLVMValueRef values[4];
 		unsigned chan;
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+		for (chan = 0; chan < 4; chan++) {
 			values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param,
 							     type, chan);
 		}
-		return ac_build_gather_values(&ctx->ac, values,
-					      TGSI_NUM_CHANNELS);
+		return ac_build_gather_values(&ctx->ac, values, 4);
 	}
 
 	/* Get the vertex offset parameter on GFX6. */
@@ -1540,7 +1204,7 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
 		value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
 					      ctx->i32_0, vtx_offset, soffset,
 					      0, ac_glc, true, false);
-		return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+		return si_build_gather_64bit(ctx, type, value, value2);
 	}
 	return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
 }
@@ -1570,58 +1234,6 @@ static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
 	return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 }
 
-static LLVMValueRef fetch_input_gs(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type,
-	unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct tgsi_shader_info *info = &ctx->shader->selector->info;
-	unsigned swizzle = swizzle_in & 0xffff;
-
-	unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
-	if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
-		return si_get_primitive_id(ctx, swizzle);
-
-	if (!reg->Register.Dimension)
-		return NULL;
-
-	return si_llvm_load_input_gs(&ctx->abi, reg->Register.Index,
-				     reg->Dimension.Index,
-				     tgsi2llvmtype(bld_base, type),
-				     swizzle);
-}
-
-static int lookup_interp_param_index(unsigned interpolate, unsigned location)
-{
-	switch (interpolate) {
-	case TGSI_INTERPOLATE_CONSTANT:
-		return 0;
-
-	case TGSI_INTERPOLATE_LINEAR:
-		if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
-			return SI_PARAM_LINEAR_SAMPLE;
-		else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
-			return SI_PARAM_LINEAR_CENTROID;
-		else
-			return SI_PARAM_LINEAR_CENTER;
-		break;
-	case TGSI_INTERPOLATE_COLOR:
-	case TGSI_INTERPOLATE_PERSPECTIVE:
-		if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
-			return SI_PARAM_PERSP_SAMPLE;
-		else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
-			return SI_PARAM_PERSP_CENTROID;
-		else
-			return SI_PARAM_PERSP_CENTER;
-		break;
-	default:
-		fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
-		return -1;
-	}
-}
-
 static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
 				       unsigned attr_index, unsigned chan,
 				       LLVMValueRef prim_mask,
@@ -1654,9 +1266,8 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
  * @param face			SI_PARAM_FRONT_FACE
  * @param result		the return value (4 components)
  */
-static void interp_fs_input(struct si_shader_context *ctx,
+static void interp_fs_color(struct si_shader_context *ctx,
 			    unsigned input_index,
-			    unsigned semantic_name,
 			    unsigned semantic_index,
 			    unsigned num_interp_inputs,
 			    unsigned colors_read_mask,
@@ -1693,8 +1304,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
 						ctx->i32_1, "");
 	}
 
-	if (semantic_name == TGSI_SEMANTIC_COLOR &&
-	    ctx->shader->key.part.ps.prolog.color_two_side) {
+	if (ctx->shader->key.part.ps.prolog.color_two_side) {
 		LLVMValueRef is_face_positive;
 
 		/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
@@ -1707,7 +1317,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
 		is_face_positive = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
 						 face, ctx->i32_0, "");
 
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+		for (chan = 0; chan < 4; chan++) {
 			LLVMValueRef front, back;
 
 			front = si_build_fs_interp(ctx,
@@ -1723,14 +1333,8 @@ static void interp_fs_input(struct si_shader_context *ctx,
 						back,
 						"");
 		}
-	} else if (semantic_name == TGSI_SEMANTIC_FOG) {
-		result[0] = si_build_fs_interp(ctx, input_index,
-					       0, prim_mask, i, j);
-		result[1] =
-		result[2] = LLVMConstReal(ctx->f32, 0.0f);
-		result[3] = LLVMConstReal(ctx->f32, 1.0f);
 	} else {
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+		for (chan = 0; chan < 4; chan++) {
 			result[chan] = si_build_fs_interp(ctx,
 							  input_index, chan,
 							  prim_mask, i, j);
@@ -1738,60 +1342,6 @@ static void interp_fs_input(struct si_shader_context *ctx,
 	}
 }
 
-void si_llvm_load_input_fs(
-	struct si_shader_context *ctx,
-	unsigned input_index,
-	LLVMValueRef out[4])
-{
-	struct si_shader *shader = ctx->shader;
-	struct tgsi_shader_info *info = &shader->selector->info;
-	LLVMValueRef main_fn = ctx->main_fn;
-	LLVMValueRef interp_param = NULL;
-	int interp_param_idx;
-	enum tgsi_semantic semantic_name = info->input_semantic_name[input_index];
-	unsigned semantic_index = info->input_semantic_index[input_index];
-	enum tgsi_interpolate_mode interp_mode = info->input_interpolate[input_index];
-	enum tgsi_interpolate_loc interp_loc = info->input_interpolate_loc[input_index];
-
-	/* Get colors from input VGPRs (set by the prolog). */
-	if (semantic_name == TGSI_SEMANTIC_COLOR) {
-		unsigned colors_read = shader->selector->info.colors_read;
-		unsigned mask = colors_read >> (semantic_index * 4);
-		unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
-				  (semantic_index ? util_bitcount(colors_read & 0xf) : 0);
-		LLVMValueRef undef = LLVMGetUndef(ctx->f32);
-
-		out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
-		out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
-		out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
-		out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
-		return;
-	}
-
-	interp_param_idx = lookup_interp_param_index(interp_mode, interp_loc);
-	if (interp_param_idx == -1)
-		return;
-	else if (interp_param_idx) {
-		interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
-	}
-
-	interp_fs_input(ctx, input_index, semantic_name,
-			semantic_index, 0, /* this param is unused */
-			shader->selector->info.colors_read, interp_param,
-			ac_get_arg(&ctx->ac, ctx->args.prim_mask),
-			LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
-			&out[0]);
-}
-
-static void declare_input_fs(
-	struct si_shader_context *ctx,
-	unsigned input_index,
-	const struct tgsi_full_declaration *decl,
-	LLVMValueRef out[4])
-{
-	si_llvm_load_input_fs(ctx, input_index, out);
-}
-
 LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
 {
 	return si_unpack_param(ctx, ctx->args.ancillary, 8, 4);
@@ -1913,7 +1463,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx,
 	addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
 					  LLVMConstInt(ctx->i32, param, 0));
 
-	return buffer_load(&ctx->bld_base, ctx->f32,
+	return buffer_load(ctx, ctx->f32,
 			   ~0, ctx->tess_offchip_ring, base, addr, true);
 
 }
@@ -1982,211 +1532,6 @@ static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
 		unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
 }
 
-void si_load_system_value(struct si_shader_context *ctx,
-			  unsigned index,
-			  const struct tgsi_full_declaration *decl)
-{
-	LLVMValueRef value = 0;
-
-	assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
-
-	switch (decl->Semantic.Name) {
-	case TGSI_SEMANTIC_INSTANCEID:
-		value = ctx->abi.instance_id;
-		break;
-
-	case TGSI_SEMANTIC_VERTEXID:
-		value = LLVMBuildAdd(ctx->ac.builder,
-				     ctx->abi.vertex_id,
-				     ac_get_arg(&ctx->ac, ctx->args.base_vertex), "");
-		break;
-
-	case TGSI_SEMANTIC_VERTEXID_NOBASE:
-		/* Unused. Clarify the meaning in indexed vs. non-indexed
-		 * draws if this is ever used again. */
-		assert(false);
-		break;
-
-	case TGSI_SEMANTIC_BASEVERTEX:
-		value = get_base_vertex(&ctx->abi);
-		break;
-
-	case TGSI_SEMANTIC_BASEINSTANCE:
-		value = ac_get_arg(&ctx->ac, ctx->args.start_instance);
-		break;
-
-	case TGSI_SEMANTIC_DRAWID:
-		value = ac_get_arg(&ctx->ac, ctx->args.draw_id);
-		break;
-
-	case TGSI_SEMANTIC_INVOCATIONID:
-		if (ctx->type == PIPE_SHADER_TESS_CTRL) {
-			value = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
-		} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
-			if (ctx->screen->info.chip_class >= GFX10) {
-				value = LLVMBuildAnd(ctx->ac.builder,
-						     ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
-						     LLVMConstInt(ctx->i32, 127, 0), "");
-			} else {
-				value = ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id);
-			}
-		} else {
-			assert(!"INVOCATIONID not implemented");
-		}
-		break;
-
-	case TGSI_SEMANTIC_POSITION:
-	{
-		LLVMValueRef pos[4] = {
-			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
-			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
-			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
-			ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
-				      LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)),
-		};
-		value = ac_build_gather_values(&ctx->ac, pos, 4);
-		break;
-	}
-
-	case TGSI_SEMANTIC_FACE:
-		value = ac_get_arg(&ctx->ac, ctx->args.front_face);
-		break;
-
-	case TGSI_SEMANTIC_SAMPLEID:
-		value = si_get_sample_id(ctx);
-		break;
-
-	case TGSI_SEMANTIC_SAMPLEPOS: {
-		LLVMValueRef pos[4] = {
-			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
-			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
-			LLVMConstReal(ctx->f32, 0),
-			LLVMConstReal(ctx->f32, 0)
-		};
-		pos[0] = ac_build_fract(&ctx->ac, pos[0], 32);
-		pos[1] = ac_build_fract(&ctx->ac, pos[1], 32);
-		value = ac_build_gather_values(&ctx->ac, pos, 4);
-		break;
-	}
-
-	case TGSI_SEMANTIC_SAMPLEMASK:
-		/* This can only occur with the OpenGL Core profile, which
-		 * doesn't support smoothing.
-		 */
-		value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
-		break;
-
-	case TGSI_SEMANTIC_TESSCOORD:
-		value = si_load_tess_coord(&ctx->abi);
-		break;
-
-	case TGSI_SEMANTIC_VERTICESIN:
-		value = si_load_patch_vertices_in(&ctx->abi);
-		break;
-
-	case TGSI_SEMANTIC_TESSINNER:
-	case TGSI_SEMANTIC_TESSOUTER:
-		value = load_tess_level(ctx, decl->Semantic.Name);
-		break;
-
-	case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
-	case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
-		value = load_tess_level_default(ctx, decl->Semantic.Name);
-		break;
-
-	case TGSI_SEMANTIC_PRIMID:
-		value = si_get_primitive_id(ctx, 0);
-		break;
-
-	case TGSI_SEMANTIC_GRID_SIZE:
-		value = ac_get_arg(&ctx->ac, ctx->args.num_work_groups);
-		break;
-
-	case TGSI_SEMANTIC_BLOCK_SIZE:
-		value = get_block_size(&ctx->abi);
-		break;
-
-	case TGSI_SEMANTIC_BLOCK_ID:
-	{
-		LLVMValueRef values[3];
-
-		for (int i = 0; i < 3; i++) {
-			values[i] = ctx->i32_0;
-			if (ctx->args.workgroup_ids[i].used) {
-				values[i] = ac_get_arg(&ctx->ac, ctx->args.workgroup_ids[i]);
-			}
-		}
-		value = ac_build_gather_values(&ctx->ac, values, 3);
-		break;
-	}
-
-	case TGSI_SEMANTIC_THREAD_ID:
-		value = ac_get_arg(&ctx->ac, ctx->args.local_invocation_ids);
-		break;
-
-	case TGSI_SEMANTIC_HELPER_INVOCATION:
-		value = ac_build_load_helper_invocation(&ctx->ac);
-		break;
-
-	case TGSI_SEMANTIC_SUBGROUP_SIZE:
-		value = LLVMConstInt(ctx->i32, ctx->ac.wave_size, 0);
-		break;
-
-	case TGSI_SEMANTIC_SUBGROUP_INVOCATION:
-		value = ac_get_thread_id(&ctx->ac);
-		break;
-
-	case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
-	{
-		LLVMValueRef id = ac_get_thread_id(&ctx->ac);
-		if (ctx->ac.wave_size == 64)
-			id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
-		value = LLVMBuildShl(ctx->ac.builder,
-				     LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, "");
-		if (ctx->ac.wave_size == 32)
-			value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
-		value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
-		break;
-	}
-
-	case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
-	case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
-	case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
-	case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
-	{
-		LLVMValueRef id = ac_get_thread_id(&ctx->ac);
-		if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
-		    decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
-			/* All bits set except LSB */
-			value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0);
-		} else {
-			/* All bits set */
-			value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0);
-		}
-		if (ctx->ac.wave_size == 64)
-			id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
-		value = LLVMBuildShl(ctx->ac.builder, value, id, "");
-		if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
-		    decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
-			value = LLVMBuildNot(ctx->ac.builder, value, "");
-		if (ctx->ac.wave_size == 32)
-			value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
-		value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
-		break;
-	}
-
-	case TGSI_SEMANTIC_CS_USER_DATA_AMD:
-		value = ac_get_arg(&ctx->ac, ctx->cs_user_data);
-		break;
-
-	default:
-		assert(!"unknown system value");
-		return;
-	}
-
-	ctx->system_values[index] = value;
-}
-
 void si_declare_compute_memory(struct si_shader_context *ctx)
 {
 	struct si_shader_selector *sel = ctx->shader->selector;
@@ -2206,15 +1551,6 @@ void si_declare_compute_memory(struct si_shader_context *ctx)
 	ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 }
 
-void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
-				    const struct tgsi_full_declaration *decl)
-{
-	assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
-	assert(decl->Range.First == decl->Range.Last);
-
-	si_declare_compute_memory(ctx);
-}
-
 static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
 {
 	LLVMValueRef ptr =
@@ -2256,15 +1592,6 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c
 	return ac_build_gather_values(&ctx->ac, desc_elems, 4);
 }
 
-static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
-{
-	LLVMValueRef list_ptr = ac_get_arg(&ctx->ac,
-					   ctx->const_and_shader_buffers);
-
-	return ac_build_load_to_sgpr(&ctx->ac, list_ptr,
-				     LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
-}
-
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@@ -2299,72 +1626,6 @@ load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
 	return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
 }
 
-static LLVMValueRef fetch_constant(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type,
-	unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct si_shader_selector *sel = ctx->shader->selector;
-	const struct tgsi_ind_register *ireg = &reg->Indirect;
-	unsigned buf, idx;
-	unsigned swizzle = swizzle_in & 0xffff;
-
-	LLVMValueRef addr, bufp;
-
-	if (swizzle_in == LP_CHAN_ALL) {
-		unsigned chan;
-		LLVMValueRef values[4];
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
-			values[chan] = fetch_constant(bld_base, reg, type, chan);
-
-		return ac_build_gather_values(&ctx->ac, values, 4);
-	}
-
-	/* Split 64-bit loads. */
-	if (tgsi_type_is_64bit(type)) {
-		LLVMValueRef lo, hi;
-
-		lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
-		hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16));
-		return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-						lo, hi);
-	}
-
-	idx = reg->Register.Index * 4 + swizzle;
-	if (reg->Register.Indirect) {
-		addr = si_get_indirect_index(ctx, ireg, 16, idx * 4);
-	} else {
-		addr = LLVMConstInt(ctx->i32, idx * 4, 0);
-	}
-
-	/* Fast path when user data SGPRs point to constant buffer 0 directly. */
-	if (sel->info.const_buffers_declared == 1 &&
-	    sel->info.shader_buffers_declared == 0) {
-		LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
-		LLVMValueRef result = buffer_load_const(ctx, desc, addr);
-		return bitcast(bld_base, type, result);
-	}
-
-	assert(reg->Register.Dimension);
-	buf = reg->Dimension.Index;
-
-	if (reg->Dimension.Indirect) {
-		LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
-		LLVMValueRef index;
-		index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
-						      reg->Dimension.Index,
-						      ctx->num_const_buffers);
-		index = LLVMBuildAdd(ctx->ac.builder, index,
-				     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
-		bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
-	} else
-		bufp = load_const_buffer_desc(ctx, buf);
-
-	return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
-}
-
 /* Initialize arguments for the shader export intrinsic */
 static void si_llvm_init_export_args(struct si_shader_context *ctx,
 				     LLVMValueRef *values,
@@ -2495,11 +1756,8 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
 	}
 }
 
-static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
-			  LLVMValueRef alpha)
+static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
 	if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
 		static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
 			[PIPE_FUNC_LESS] = LLVMRealOLT,
@@ -2522,11 +1780,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 	}
 }
 
-static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
+static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx,
 						  LLVMValueRef alpha,
 						  unsigned samplemask_param)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef coverage;
 
 	/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
@@ -2569,8 +1826,8 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx,
 		args->out[3] = LLVMConstReal(ctx->f32, 0.0f);
 
 		/* Compute dot products of position and user clip plane vectors */
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-			for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
+		for (chan = 0; chan < 4; chan++) {
+			for (const_chan = 0; const_chan < 4; const_chan++) {
 				LLVMValueRef addr =
 					LLVMConstInt(ctx->i32, ((reg_index * 4 + chan) * 4 +
 								const_chan) * 4, 0);
@@ -3030,9 +2287,8 @@ void si_llvm_export_vs(struct si_shader_context *ctx,
  * Forward all outputs from the vertex shader to the TES. This is only used
  * for the fixed function TCS.
  */
-static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
+static void si_copy_tcs_inputs(struct si_shader_context *ctx)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef invocation_id, buffer, buffer_offset;
 	LLVMValueRef lds_vertex_stride, lds_base;
 	uint64_t inputs;
@@ -3059,21 +2315,20 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 		                              invocation_id,
 		                              LLVMConstInt(ctx->i32, i, 0));
 
-		LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
+		LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
 
 		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
 					    buffer_offset, 0, ac_glc);
 	}
 }
 
-static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
+static void si_write_tess_factors(struct si_shader_context *ctx,
 				  LLVMValueRef rel_patch_id,
 				  LLVMValueRef invocation_id,
 				  LLVMValueRef tcs_out_current_patch_data_offset,
 				  LLVMValueRef invoc0_tf_outer[4],
 				  LLVMValueRef invoc0_tf_inner[2])
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
 	unsigned tess_inner_index, tess_outer_index;
 	LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
@@ -3082,7 +2337,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
 	/* Add a barrier before loading tess factors from LDS. */
 	if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
-		si_llvm_emit_barrier(NULL, bld_base, NULL);
+		si_llvm_emit_barrier(ctx);
 
 	/* Do this only for invocation 0, because the tess levels are per-patch,
 	 * not per-vertex.
@@ -3144,11 +2399,11 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
 		for (i = 0; i < outer_comps; i++) {
 			outer[i] = out[i] =
-				lshs_lds_load(bld_base, ctx->ac.i32, i, lds_outer);
+				lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
 		}
 		for (i = 0; i < inner_comps; i++) {
 			inner[i] = out[outer_comps+i] =
-				lshs_lds_load(bld_base, ctx->ac.i32, i, lds_inner);
+				lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
 		}
 	}
 
@@ -3279,11 +2534,10 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
 				      LLVMValueRef *addrs)
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
-	si_copy_tcs_inputs(bld_base);
+	si_copy_tcs_inputs(ctx);
 
 	rel_patch_id = get_rel_patch_id(ctx);
 	invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
@@ -3595,12 +2849,6 @@ static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
 	emit_gs_epilogue(ctx);
 }
 
-static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_gs_epilogue(ctx);
-}
-
 static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
 				     unsigned max_outputs,
 				     LLVMValueRef *addrs)
@@ -3677,24 +2925,15 @@ static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi,
 	ctx->return_value = ret;
 }
 
-static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS,
-			      &ctx->outputs[0][0]);
-}
-
 struct si_ps_exports {
 	unsigned num;
 	struct ac_export_args args[10];
 };
 
-static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
+static void si_export_mrt_z(struct si_shader_context *ctx,
 			    LLVMValueRef depth, LLVMValueRef stencil,
 			    LLVMValueRef samplemask, struct si_ps_exports *exp)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct ac_export_args args;
 
 	ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
@@ -3702,12 +2941,11 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
 	memcpy(&exp->args[exp->num++], &args, sizeof(args));
 }
 
-static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+static void si_export_mrt_color(struct si_shader_context *ctx,
 				LLVMValueRef *color, unsigned index,
 				unsigned samplemask_param,
 				bool is_last, struct si_ps_exports *exp)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
 	int i;
 
 	/* Clamp color */
@@ -3722,11 +2960,11 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
 	/* Alpha test */
 	if (index == 0 &&
 	    ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
-		si_alpha_test(bld_base, color[3]);
+		si_alpha_test(ctx, color[3]);
 
 	/* Line & polygon smoothing */
 	if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
-		color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+		color[3] = si_scale_alpha_by_sample_mask(ctx, color[3],
 							 samplemask_param);
 
 	/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
@@ -3873,345 +3111,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
 	ctx->return_value = ret;
 }
 
-static void membar_emit(
-		const struct lp_build_tgsi_action *action,
-		struct lp_build_tgsi_context *bld_base,
-		struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
-	unsigned flags = LLVMConstIntGetZExtValue(src0);
-	unsigned wait_flags = 0;
-
-	if (flags & TGSI_MEMBAR_THREAD_GROUP)
-		wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
-
-	if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
-		     TGSI_MEMBAR_SHADER_BUFFER |
-		     TGSI_MEMBAR_SHADER_IMAGE))
-		wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
-
-	if (flags & TGSI_MEMBAR_SHARED)
-		wait_flags |= AC_WAIT_LGKM;
-
-	ac_build_waitcnt(&ctx->ac, wait_flags);
-}
-
-static void clock_emit(
-		const struct lp_build_tgsi_action *action,
-		struct lp_build_tgsi_context *bld_base,
-		struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef tmp = ac_build_shader_clock(&ctx->ac);
-
-	emit_data->output[0] =
-		LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, "");
-	emit_data->output[1] =
-		LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, "");
-}
-
-static void si_llvm_emit_ddxy(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	unsigned opcode = emit_data->info->opcode;
-	LLVMValueRef val;
-	int idx;
-	unsigned mask;
-
-	if (opcode == TGSI_OPCODE_DDX_FINE)
-		mask = AC_TID_MASK_LEFT;
-	else if (opcode == TGSI_OPCODE_DDY_FINE)
-		mask = AC_TID_MASK_TOP;
-	else
-		mask = AC_TID_MASK_TOP_LEFT;
-
-	/* for DDX we want to next X pixel, DDY next Y pixel. */
-	idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
-
-	val = ac_to_integer(&ctx->ac, emit_data->args[0]);
-	val = ac_build_ddxy(&ctx->ac, mask, idx, val);
-	emit_data->output[emit_data->chan] = val;
-}
-
-static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
-				struct lp_build_tgsi_context *bld_base,
-				struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct si_shader *shader = ctx->shader;
-	const struct tgsi_shader_info *info = &shader->selector->info;
-	LLVMValueRef interp_param;
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	const struct tgsi_full_src_register *input = &inst->Src[0];
-	int input_base, input_array_size;
-	int chan;
-	int i;
-	LLVMValueRef prim_mask = ac_get_arg(&ctx->ac, ctx->args.prim_mask);
-	LLVMValueRef array_idx, offset_x = NULL, offset_y = NULL;
-	int interp_param_idx;
-	unsigned interp;
-	unsigned location;
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
-		/* offset is in second src, first two channels */
-		offset_x = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
-					       TGSI_CHAN_X);
-		offset_y = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
-					       TGSI_CHAN_Y);
-	} else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
-		LLVMValueRef sample_position;
-		LLVMValueRef sample_id;
-		LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
-
-		/* fetch sample ID, then fetch its sample position,
-		 * and place into first two channels.
-		 */
-		sample_id = lp_build_emit_fetch(bld_base,
-						emit_data->inst, 1, TGSI_CHAN_X);
-		sample_id = ac_to_integer(&ctx->ac, sample_id);
-
-		/* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading
-		 * Language 4.50 spec says about interpolateAtSample:
-		 *
-		 *    "Returns the value of the input interpolant variable at
-		 *     the location of sample number sample. If multisample
-		 *     buffers are not available, the input variable will be
-		 *     evaluated at the center of the pixel. If sample sample
-		 *     does not exist, the position used to interpolate the
-		 *     input variable is undefined."
-		 *
-		 * This means that sample_id values outside of the valid are
-		 * in fact valid input, and the usual mechanism for loading the
-		 * sample position doesn't work.
-		 */
-		if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
-			LLVMValueRef center[4] = {
-				LLVMConstReal(ctx->f32, 0.5),
-				LLVMConstReal(ctx->f32, 0.5),
-				ctx->ac.f32_0,
-				ctx->ac.f32_0,
-			};
-
-			sample_position = ac_build_gather_values(&ctx->ac, center, 4);
-		} else {
-			sample_position = load_sample_position(&ctx->abi, sample_id);
-		}
-
-		offset_x = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
-						   ctx->i32_0, "");
-
-		offset_x = LLVMBuildFSub(ctx->ac.builder, offset_x, halfval, "");
-		offset_y = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
-						   ctx->i32_1, "");
-		offset_y = LLVMBuildFSub(ctx->ac.builder, offset_y, halfval, "");
-	}
-
-	assert(input->Register.File == TGSI_FILE_INPUT);
-
-	if (input->Register.Indirect) {
-		unsigned array_id = input->Indirect.ArrayID;
-
-		if (array_id) {
-			input_base = info->input_array_first[array_id];
-			input_array_size = info->input_array_last[array_id] - input_base + 1;
-		} else {
-			input_base = inst->Src[0].Register.Index;
-			input_array_size = info->num_inputs - input_base;
-		}
-
-		array_idx = si_get_indirect_index(ctx, &input->Indirect,
-						  1, input->Register.Index - input_base);
-	} else {
-		input_base = inst->Src[0].Register.Index;
-		input_array_size = 1;
-		array_idx = ctx->i32_0;
-	}
-
-	interp = shader->selector->info.input_interpolate[input_base];
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
-	    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
-		location = TGSI_INTERPOLATE_LOC_CENTER;
-	else
-		location = TGSI_INTERPOLATE_LOC_CENTROID;
-
-	interp_param_idx = lookup_interp_param_index(interp, location);
-	if (interp_param_idx == -1)
-		return;
-	else if (interp_param_idx)
-		interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
-	else
-		interp_param = NULL;
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
-	    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
-		LLVMValueRef ij_out[2];
-		LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
-
-		/*
-		 * take the I then J parameters, and the DDX/Y for it, and
-		 * calculate the IJ inputs for the interpolator.
-		 * temp1 = ddx * offset/sample.x + I;
-		 * interp_param.I = ddy * offset/sample.y + temp1;
-		 * temp1 = ddx * offset/sample.x + J;
-		 * interp_param.J = ddy * offset/sample.y + temp1;
-		 */
-		for (i = 0; i < 2; i++) {
-			LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0);
-			LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0);
-			LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
-								      ddxy_out, ix_ll, "");
-			LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
-								      ddxy_out, iy_ll, "");
-			LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
-									 interp_param, ix_ll, "");
-			LLVMValueRef temp;
-
-			interp_el = ac_to_float(&ctx->ac, interp_el);
-
-			temp = ac_build_fmad(&ctx->ac, ddx_el, offset_x, interp_el);
-			ij_out[i] = ac_build_fmad(&ctx->ac, ddy_el, offset_y, temp);
-		}
-		interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
-	}
-
-	if (interp_param)
-		interp_param = ac_to_float(&ctx->ac, interp_param);
-
-	for (chan = 0; chan < 4; chan++) {
-		LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size));
-		unsigned schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
-
-		for (unsigned idx = 0; idx < input_array_size; ++idx) {
-			LLVMValueRef v, i = NULL, j = NULL;
-
-			if (interp_param) {
-				i = LLVMBuildExtractElement(
-					ctx->ac.builder, interp_param, ctx->i32_0, "");
-				j = LLVMBuildExtractElement(
-					ctx->ac.builder, interp_param, ctx->i32_1, "");
-			}
-			v = si_build_fs_interp(ctx, input_base + idx, schan,
-					       prim_mask, i, j);
-
-			gather = LLVMBuildInsertElement(ctx->ac.builder,
-				gather, v, LLVMConstInt(ctx->i32, idx, false), "");
-		}
-
-		emit_data->output[chan] = LLVMBuildExtractElement(
-			ctx->ac.builder, gather, array_idx, "");
-	}
-}
-
-static void vote_all_emit(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-        LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, emit_data->args[0]);
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void vote_any_emit(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-        LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, emit_data->args[0]);
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void vote_eq_emit(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-        LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, emit_data->args[0]);
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void ballot_emit(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef tmp;
-
-	tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-	tmp = ac_build_ballot(&ctx->ac, tmp);
-
-	emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
-
-	if (ctx->ac.wave_size == 32) {
-		emit_data->output[1] = ctx->i32_0;
-	} else {
-		tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), "");
-		emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
-	}
-}
-
-static void read_lane_emit(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_READ_INVOC) {
-		emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
-							 0, emit_data->src_chan);
-
-		/* Always read the source invocation (= lane) from the X channel. */
-		emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
-							 1, TGSI_CHAN_X);
-		emit_data->arg_count = 2;
-	}
-
-	/* We currently have no other way to prevent LLVM from lifting the icmp
-	 * calls to a dominating basic block.
-	 */
-	ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]);
-
-	for (unsigned i = 0; i < emit_data->arg_count; ++i)
-		emit_data->args[i] = ac_to_integer(&ctx->ac, emit_data->args[i]);
-
-	emit_data->output[emit_data->chan] =
-		ac_build_intrinsic(&ctx->ac, action->intr_name,
-				   ctx->i32, emit_data->args, emit_data->arg_count,
-				   AC_FUNC_ATTR_READNONE |
-				   AC_FUNC_ATTR_CONVERGENT);
-}
-
-static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
-				       struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
-	LLVMValueRef imm;
-	unsigned stream;
-
-	assert(src0.File == TGSI_FILE_IMMEDIATE);
-
-	imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
-	stream = LLVMConstIntGetZExtValue(imm) & 0x3;
-	return stream;
-}
-
 /* Emit one vertex from the geometry shader */
 static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
 				unsigned stream,
@@ -4296,18 +3195,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
 		ac_build_endif(&ctx->ac, 6505);
 }
 
-/* Emit one vertex from the geometry shader */
-static void si_tgsi_emit_vertex(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	unsigned stream = si_llvm_get_stream(bld_base, emit_data);
-
-	si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
-}
-
 /* Cut one primitive from the geometry shader */
 static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
 				   unsigned stream)
@@ -4324,23 +3211,8 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
 			 si_get_gs_wave_id(ctx));
 }
 
-/* Cut one primitive from the geometry shader */
-static void si_tgsi_emit_primitive(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	si_llvm_emit_primitive(&ctx->abi, si_llvm_get_stream(bld_base, emit_data));
-}
-
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
-				 struct lp_build_tgsi_context *bld_base,
-				 struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_barrier(struct si_shader_context *ctx)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
 	/* GFX6 only (thanks to a hw bug workaround):
 	 * The real barrier instruction isnât needed, because an entire patch
 	 * always fits into a single wave.
@@ -5654,9 +4526,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 	shader->selector = gs_selector;
 	shader->is_gs_copy_shader = true;
 
-	si_init_shader_ctx(&ctx, sscreen, compiler,
-			   si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
-			   false);
+	si_llvm_context_init(&ctx, sscreen, compiler,
+			     si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
+			     64);
 	ctx.shader = shader;
 	ctx.type = PIPE_SHADER_VERTEX;
 
@@ -5917,47 +4789,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
 	}
 }
 
-static void si_init_shader_ctx(struct si_shader_context *ctx,
-			       struct si_screen *sscreen,
-			       struct ac_llvm_compiler *compiler,
-			       unsigned wave_size,
-			       bool nir)
-{
-	struct lp_build_tgsi_context *bld_base;
-
-	si_llvm_context_init(ctx, sscreen, compiler, wave_size,
-			     nir ? 64 : wave_size);
-
-	bld_base = &ctx->bld_base;
-	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
-
-	bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID].emit = build_interp_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE].emit = build_interp_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET].emit = build_interp_intrinsic;
-
-	bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
-
-	bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
-
-	bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
-	bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
-	bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
-	bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
-
-	bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
-	bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
-	bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
-	bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
-	bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
-	bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
-	bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
-	bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
-
-	bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
-	bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_tgsi_emit_primitive;
-	bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
-}
-
 static void si_optimize_vs_outputs(struct si_shader_context *ctx)
 {
 	struct si_shader *shader = ctx->shader;
@@ -6014,17 +4845,34 @@ LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
 			     si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
 }
 
+static void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
+{
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	LLVMBuilderRef builder = ctx->ac.builder;
+
+	if (ctx->shader->selector->force_correct_derivs_after_kill) {
+		/* Kill immediately while maintaining WQM. */
+		ac_build_kill_if_false(&ctx->ac,
+				       ac_build_wqm_vote(&ctx->ac, visible));
+
+		LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
+		mask = LLVMBuildAnd(builder, mask, visible, "");
+		LLVMBuildStore(builder, mask, ctx->postponed_kill);
+		return;
+	}
+
+	ac_build_kill_if_false(&ctx->ac, visible);
+}
+
 static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 				 struct nir_shader *nir, bool free_nir)
 {
 	struct si_shader *shader = ctx->shader;
 	struct si_shader_selector *sel = shader->selector;
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 
 	// TODO clean all this up!
 	switch (ctx->type) {
 	case PIPE_SHADER_VERTEX:
-		ctx->load_input = declare_input_vs;
 		if (shader->key.as_ls)
 			ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
 		else if (shader->key.as_es)
@@ -6035,22 +4883,16 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 			ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
 		else
 			ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
-		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		ctx->abi.load_base_vertex = get_base_vertex;
 		break;
 	case PIPE_SHADER_TESS_CTRL:
-		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
 		ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
 		ctx->abi.load_tess_level = si_load_tess_level;
-		bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
-		bld_base->emit_store = store_output_tcs;
 		ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
 		ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
 		ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
-		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		break;
 	case PIPE_SHADER_TESS_EVAL:
-		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
 		ctx->abi.load_tess_varyings = si_nir_load_input_tes;
 		ctx->abi.load_tess_coord = si_load_tess_coord;
 		ctx->abi.load_tess_level = si_load_tess_level;
@@ -6061,20 +4903,15 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 			ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
 		else
 			ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
-		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		break;
 	case PIPE_SHADER_GEOMETRY:
-		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
 		ctx->abi.load_inputs = si_nir_load_input_gs;
 		ctx->abi.emit_vertex = si_llvm_emit_vertex;
 		ctx->abi.emit_primitive = si_llvm_emit_primitive;
 		ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
-		bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
 		break;
 	case PIPE_SHADER_FRAGMENT:
-		ctx->load_input = declare_input_fs;
 		ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
-		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		ctx->abi.load_sample_position = load_sample_position;
 		ctx->abi.load_sample_mask_in = load_sample_mask_in;
 		ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
@@ -6229,7 +5066,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 				 * and contains a barrier, it will wait there and then
 				 * reach s_endpgm.
 				 */
-				si_llvm_emit_barrier(NULL, bld_base, NULL);
+				si_llvm_emit_barrier(ctx);
 			}
 		}
 	}
@@ -6241,19 +5078,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 			       ctx->postponed_kill);
 	}
 
-	if (sel->tokens) {
-		if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
-			fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
-			return false;
-		}
-	} else {
-		bool success = si_nir_build_llvm(ctx, nir);
-		if (free_nir)
-			ralloc_free(nir);
-		if (!success) {
-			fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
-			return false;
-		}
+	bool success = si_nir_build_llvm(ctx, nir);
+	if (free_nir)
+		ralloc_free(nir);
+	if (!success) {
+		fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
+		return false;
 	}
 
 	si_llvm_build_ret(ctx, ctx->return_value);
@@ -6899,10 +5729,10 @@ static struct nir_shader *get_nir_shader(struct si_shader_selector *sel,
 	return NULL;
 }
 
-int si_compile_tgsi_shader(struct si_screen *sscreen,
-			   struct ac_llvm_compiler *compiler,
-			   struct si_shader *shader,
-			   struct pipe_debug_callback *debug)
+int si_compile_shader(struct si_screen *sscreen,
+		      struct ac_llvm_compiler *compiler,
+		      struct si_shader *shader,
+		      struct pipe_debug_callback *debug)
 {
 	struct si_shader_selector *sel = shader->selector;
 	struct si_shader_context ctx;
@@ -6914,16 +5744,12 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	 * conversion fails. */
 	if (si_can_dump_shader(sscreen, sel->type) &&
 	    !(sscreen->debug_flags & DBG(NO_TGSI))) {
-		if (sel->tokens)
-			tgsi_dump(sel->tokens, 0);
-		else
-			nir_print_shader(nir, stderr);
+		nir_print_shader(nir, stderr);
 		si_dump_streamout(&sel->so);
 	}
 
-	si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader),
-			   nir != NULL);
-	si_llvm_context_set_ir(&ctx, shader, nir);
+	si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader), 64);
+	si_llvm_context_set_ir(&ctx, shader);
 
 	memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
 	       sizeof(shader->info.vs_output_param_offset));
@@ -6982,7 +5808,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 			shader_ls.key.mono = shader->key.mono;
 			shader_ls.key.opt = shader->key.opt;
 			shader_ls.is_monolithic = true;
-			si_llvm_context_set_ir(&ctx, &shader_ls, nir);
+			si_llvm_context_set_ir(&ctx, &shader_ls);
 
 			if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
 				si_llvm_dispose(&ctx);
@@ -7050,7 +5876,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 			shader_es.key.mono = shader->key.mono;
 			shader_es.key.opt = shader->key.opt;
 			shader_es.is_monolithic = true;
-			si_llvm_context_set_ir(&ctx, &shader_es, nir);
+			si_llvm_context_set_ir(&ctx, &shader_es);
 
 			if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
 				si_llvm_dispose(&ctx);
@@ -7269,10 +6095,10 @@ si_get_shader_part(struct si_screen *sscreen,
 	}
 
 	struct si_shader_context ctx;
-	si_init_shader_ctx(&ctx, sscreen, compiler,
-			   si_get_wave_size(sscreen, type, shader.key.as_ngg,
-					    shader.key.as_es),
-			   false);
+	si_llvm_context_init(&ctx, sscreen, compiler,
+			     si_get_wave_size(sscreen, type, shader.key.as_ngg,
+					      shader.key.as_es),
+			     64);
 	ctx.shader = &shader;
 	ctx.type = type;
 
@@ -7540,8 +6366,6 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 					 union si_shader_part_key *key)
 {
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
 	memset(&ctx->args, 0, sizeof(ctx->args));
 
 	if (ctx->screen->info.chip_class >= GFX9) {
@@ -7608,7 +6432,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 	for (unsigned i = 0; i < 6; i++)
 		invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
 
-	si_write_tess_factors(bld_base,
+	si_write_tess_factors(ctx,
 			      ac_get_arg(&ctx->ac, rel_patch_id),
 			      ac_get_arg(&ctx->ac, invocation_id),
 			      ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
@@ -7914,9 +6738,8 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 			face = ac_to_integer(&ctx->ac, face);
 		}
 
-		interp_fs_input(ctx,
-				key->ps_prolog.color_attr_index[i],
-				TGSI_SEMANTIC_COLOR, i,
+		interp_fs_color(ctx,
+				key->ps_prolog.color_attr_index[i], i,
 				key->ps_prolog.num_interp_inputs,
 				key->ps_prolog.colors_read, interp_ij,
 				prim_mask, face, color);
@@ -7990,7 +6813,6 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key)
 {
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
 	int i;
 	struct si_ps_exports exp = {};
@@ -8060,7 +6882,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 		for (i = 0; i < 4; i++)
 			color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
 
-		si_export_mrt_color(bld_base, color, mrt,
+		si_export_mrt_color(ctx, color, mrt,
 				    ctx->args.arg_count - 1,
 				    mrt == last_color_export, &exp);
 	}
@@ -8074,7 +6896,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 		samplemask = LLVMGetParam(ctx->main_fn, vgpr++);
 
 	if (depth || stencil || samplemask)
-		si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
+		si_export_mrt_z(ctx, depth, stencil, samplemask, &exp);
 	else if (last_color_export == -1)
 		ac_build_export_null(&ctx->ac);
 
@@ -8240,7 +7062,7 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil
 		/* Monolithic shader (compiled as a whole, has many variants,
 		 * may take a long time to compile).
 		 */
-		r = si_compile_tgsi_shader(sscreen, compiler, shader, debug);
+		r = si_compile_shader(sscreen, compiler, shader, debug);
 		if (r)
 			return false;
 	} else {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d9a199bfa3c..30dbe1c6a6e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -326,7 +326,6 @@ struct si_shader_selector {
 
 	struct si_shader	*gs_copy_shader;
 
-	struct tgsi_token       *tokens;
 	struct nir_shader       *nir;
 	void			*nir_binary;
 	unsigned		nir_size;
@@ -730,10 +729,10 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 			   struct ac_llvm_compiler *compiler,
 			   struct si_shader_selector *gs_selector,
 			   struct pipe_debug_callback *debug);
-int si_compile_tgsi_shader(struct si_screen *sscreen,
-			   struct ac_llvm_compiler *compiler,
-			   struct si_shader *shader,
-			   struct pipe_debug_callback *debug);
+int si_compile_shader(struct si_screen *sscreen,
+		      struct ac_llvm_compiler *compiler,
+		      struct si_shader *shader,
+		      struct pipe_debug_callback *debug);
 bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index a9b40f41b4c..1ec74a84a69 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -26,10 +26,6 @@
 #define SI_SHADER_PRIVATE_H
 
 #include "si_shader.h"
-#include "gallivm/lp_bld_flow.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "tgsi/tgsi_parse.h"
 #include "ac_shader_abi.h"
 
 #include <llvm-c/Core.h>
@@ -37,12 +33,7 @@
 
 struct pipe_debug_callback;
 
-#define RADEON_LLVM_MAX_INPUT_SLOTS 32
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
-#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
-
-#define RADEON_LLVM_MAX_SYSTEM_VALUES 11
-#define RADEON_LLVM_MAX_ADDRS 16
 
 struct si_shader_output_values {
 	LLVMValueRef values[4];
@@ -52,8 +43,6 @@ struct si_shader_output_values {
 };
 
 struct si_shader_context {
-	struct lp_build_tgsi_context bld_base;
-	struct gallivm_state gallivm;
 	struct ac_llvm_context ac;
 	struct si_shader *shader;
 	struct si_screen *screen;
@@ -69,42 +58,11 @@ struct si_shader_context {
 	struct ac_shader_args args;
 	struct ac_shader_abi abi;
 
-	/** This function is responsible for initilizing the inputs array and will be
-	  * called once for each input declared in the TGSI shader.
-	  */
-	void (*load_input)(struct si_shader_context *,
-			   unsigned input_index,
-			   const struct tgsi_full_declaration *decl,
-			   LLVMValueRef out[4]);
-
-	/** This array contains the input values for the shader.  Typically these
-	  * values will be in the form of a target intrinsic that will inform the
-	  * backend how to load the actual inputs to the shader.
-	  */
-	struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS];
 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
-	LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
-	LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS];
-
-	/** This pointer is used to contain the temporary values.
-	  * The amount of temporary used in tgsi can't be bound to a max value and
-	  * thus we must allocate this array at runtime.
-	  */
-	LLVMValueRef *temps;
-	unsigned temps_count;
-	LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
-
-	LLVMValueRef *imms;
-	unsigned imms_num;
 
 	LLVMBasicBlockRef merged_wrap_if_entry_block;
 	int merged_wrap_if_label;
 
-	struct tgsi_array_info *temp_arrays;
-	LLVMValueRef *temp_array_allocas;
-
-	LLVMValueRef undef_alloca;
-
 	LLVMValueRef main_fn;
 	LLVMTypeRef return_type;
 
@@ -233,12 +191,6 @@ struct si_shader_context {
 	LLVMValueRef i1true;
 };
 
-static inline struct si_shader_context *
-si_shader_context(struct lp_build_tgsi_context *bld_base)
-{
-	return (struct si_shader_context*)bld_base;
-}
-
 static inline struct si_shader_context *
 si_shader_context_from_abi(struct ac_shader_abi *abi)
 {
@@ -255,12 +207,6 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
 			 struct pipe_debug_callback *debug,
 			 bool less_optimized, unsigned wave_size);
 
-LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
-			  enum tgsi_opcode_type type);
-
-LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
-		     enum tgsi_opcode_type type, LLVMValueRef value);
-
 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 				 LLVMValueRef index,
 				 unsigned num);
@@ -271,8 +217,7 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 			  unsigned wave_size,
 			  unsigned ballot_mask_bits);
 void si_llvm_context_set_ir(struct si_shader_context *ctx,
-			    struct si_shader *shader,
-			    struct nir_shader *nir);
+			    struct si_shader *shader);
 
 void si_llvm_create_func(struct si_shader_context *ctx,
 			 const char *name,
@@ -282,18 +227,6 @@ void si_llvm_dispose(struct si_shader_context *ctx);
 
 void si_llvm_optimize_module(struct si_shader_context *ctx);
 
-LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
-				      LLVMTypeRef type,
-				      LLVMValueRef ptr,
-				      LLVMValueRef ptr2);
-
-LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
-				const struct tgsi_full_src_register *reg,
-				enum tgsi_opcode_type type,
-				unsigned swizzle);
-
-void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible);
-
 LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
 				   LLVMTypeRef type,
 				   LLVMValueRef vertex_index,
@@ -306,34 +239,10 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
 				   bool is_patch,
 				   bool is_compact,
 				   bool load_input);
-
-LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
-				   unsigned input_index,
-				   unsigned vtx_offset_param,
-				   LLVMTypeRef type,
-				   unsigned swizzle);
-
 LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi,
 					enum glsl_interp_mode interp,
 					unsigned location);
-
-void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
-			const struct tgsi_full_instruction *inst,
-			const struct tgsi_opcode_info *info,
-			unsigned index,
-			LLVMValueRef dst[4]);
-
-LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
-				   const struct tgsi_ind_register *ind,
-				   unsigned addr_mul, int rel_index);
-LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
-					   const struct tgsi_ind_register *ind,
-					   int rel_index, unsigned num);
 LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
-
-void si_shader_context_init_alu(struct si_shader_context *ctx);
-void si_shader_context_init_mem(struct si_shader_context *ctx);
-
 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
 				  LLVMValueRef list, LLVMValueRef index,
 				  enum ac_descriptor_type type);
@@ -342,14 +251,7 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
 				enum ac_descriptor_type desc_type,
 				bool uses_store, bool bindless);
 LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi);
-
-void si_load_system_value(struct si_shader_context *ctx,
-			  unsigned index,
-			  const struct tgsi_full_declaration *decl);
 void si_declare_compute_memory(struct si_shader_context *ctx);
-void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
-				    const struct tgsi_full_declaration *decl);
-
 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
 				 unsigned swizzle);
 void si_llvm_export_vs(struct si_shader_context *ctx,
@@ -365,10 +267,6 @@ void si_llvm_load_input_vs(
 	struct si_shader_context *ctx,
 	unsigned input_index,
 	LLVMValueRef out[4]);
-void si_llvm_load_input_fs(
-	struct si_shader_context *ctx,
-	unsigned input_index,
-	LLVMValueRef out[4]);
 
 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
new file mode 100644
index 00000000000..64ceaf7ed34
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "ac_llvm_util.h"
+#include "util/u_memory.h"
+
+struct si_llvm_diagnostics {
+	struct pipe_debug_callback *debug;
+	unsigned retval;
+};
+
+static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
+{
+	struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
+	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+	const char *severity_str = NULL;
+
+	switch (severity) {
+	case LLVMDSError:
+		severity_str = "error";
+		break;
+	case LLVMDSWarning:
+		severity_str = "warning";
+		break;
+	case LLVMDSRemark:
+	case LLVMDSNote:
+	default:
+		return;
+	}
+
+	char *description = LLVMGetDiagInfoDescription(di);
+
+	pipe_debug_message(diag->debug, SHADER_INFO,
+			   "LLVM diagnostic (%s): %s", severity_str, description);
+
+	if (severity == LLVMDSError) {
+		diag->retval = 1;
+		fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
+	}
+
+	LLVMDisposeMessage(description);
+}
+
+/**
+ * Compile an LLVM module to machine code.
+ *
+ * @returns 0 for success, 1 for failure
+ */
+unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
+			 struct ac_llvm_compiler *compiler,
+			 struct pipe_debug_callback *debug,
+			 bool less_optimized, unsigned wave_size)
+{
+	struct ac_compiler_passes *passes = compiler->passes;
+
+	if (wave_size == 32)
+		passes = compiler->passes_wave32;
+	else if (less_optimized && compiler->low_opt_passes)
+		passes = compiler->low_opt_passes;
+
+	struct si_llvm_diagnostics diag;
+	LLVMContextRef llvm_ctx;
+
+	diag.debug = debug;
+	diag.retval = 0;
+
+	/* Setup Diagnostic Handler*/
+	llvm_ctx = LLVMGetModuleContext(M);
+
+	LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
+
+	/* Compile IR. */
+	if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
+				      &binary->elf_size))
+		diag.retval = 1;
+
+	if (diag.retval != 0)
+		pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
+	return diag.retval;
+}
+
+void si_shader_binary_clean(struct si_shader_binary *binary)
+{
+	free((void *)binary->elf_buffer);
+	binary->elf_buffer = NULL;
+
+	free(binary->llvm_ir_string);
+	binary->llvm_ir_string = NULL;
+}
+
+void si_llvm_context_init(struct si_shader_context *ctx,
+			  struct si_screen *sscreen,
+			  struct ac_llvm_compiler *compiler,
+			  unsigned wave_size,
+			  unsigned ballot_mask_bits)
+{
+	/* Initialize the gallivm object:
+	 * We are only using the module, context, and builder fields of this struct.
+	 * This should be enough for us to be able to pass our gallivm struct to the
+	 * helper functions in the gallivm module.
+	 */
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->screen = sscreen;
+	ctx->compiler = compiler;
+
+	ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
+			     sscreen->info.family,
+			     AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
+			     wave_size, ballot_mask_bits);
+
+	ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
+	ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
+	ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
+	ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
+	ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
+	ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
+	ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
+	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
+	ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
+	ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
+}
+
+/* Set the context to a certain TGSI shader. Can be called repeatedly
+ * to change the shader. */
+void si_llvm_context_set_ir(struct si_shader_context *ctx,
+			    struct si_shader *shader)
+{
+	struct si_shader_selector *sel = shader->selector;
+	const struct tgsi_shader_info *info = &sel->info;
+
+	ctx->shader = shader;
+	ctx->type = sel->type;
+
+	ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
+	ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
+
+	ctx->num_samplers = util_last_bit(info->samplers_declared);
+	ctx->num_images = util_last_bit(info->images_declared);
+}
+
+void si_llvm_create_func(struct si_shader_context *ctx,
+			 const char *name,
+			 LLVMTypeRef *return_types, unsigned num_return_elems)
+{
+	LLVMTypeRef ret_type;
+	enum ac_llvm_calling_convention call_conv;
+	enum pipe_shader_type real_shader_type;
+
+	if (num_return_elems)
+		ret_type = LLVMStructTypeInContext(ctx->ac.context,
+						   return_types,
+						   num_return_elems, true);
+	else
+		ret_type = ctx->voidt;
+
+	real_shader_type = ctx->type;
+
+	/* LS is merged into HS (TCS), and ES is merged into GS. */
+	if (ctx->screen->info.chip_class >= GFX9) {
+		if (ctx->shader->key.as_ls)
+			real_shader_type = PIPE_SHADER_TESS_CTRL;
+		else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
+			real_shader_type = PIPE_SHADER_GEOMETRY;
+	}
+
+	switch (real_shader_type) {
+	case PIPE_SHADER_VERTEX:
+	case PIPE_SHADER_TESS_EVAL:
+		call_conv = AC_LLVM_AMDGPU_VS;
+		break;
+	case PIPE_SHADER_TESS_CTRL:
+		call_conv = AC_LLVM_AMDGPU_HS;
+		break;
+	case PIPE_SHADER_GEOMETRY:
+		call_conv = AC_LLVM_AMDGPU_GS;
+		break;
+	case PIPE_SHADER_FRAGMENT:
+		call_conv = AC_LLVM_AMDGPU_PS;
+		break;
+	case PIPE_SHADER_COMPUTE:
+		call_conv = AC_LLVM_AMDGPU_CS;
+		break;
+	default:
+		unreachable("Unhandle shader type");
+	}
+
+	/* Setup the function */
+	ctx->return_type = ret_type;
+	ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
+				     ret_type, ctx->ac.module);
+}
+
+void si_llvm_optimize_module(struct si_shader_context *ctx)
+{
+	/* Dump LLVM IR before any optimization passes */
+	if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
+	    si_can_dump_shader(ctx->screen, ctx->type))
+		LLVMDumpModule(ctx->ac.module);
+
+	/* Run the pass */
+	LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
+	LLVMDisposeBuilder(ctx->ac.builder);
+}
+
+void si_llvm_dispose(struct si_shader_context *ctx)
+{
+	LLVMDisposeModule(ctx->ac.module);
+	LLVMContextDispose(ctx->ac.context);
+	ac_llvm_context_dispose(&ctx->ac);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_build.c b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c
new file mode 100644
index 00000000000..e3625214258
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <llvm/Config/llvm-config.h>
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "sid.h"
+#include "ac_llvm_util.h"
+
+/**
+ * Return a value that is equal to the given i32 \p index if it lies in [0,num)
+ * or an undefined value in the same interval otherwise.
+ */
+LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
+				 LLVMValueRef index,
+				 unsigned num)
+{
+	LLVMBuilderRef builder = ctx->ac.builder;
+	LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+	LLVMValueRef cc;
+
+	if (util_is_power_of_two_or_zero(num)) {
+		index = LLVMBuildAnd(builder, index, c_max, "");
+	} else {
+		/* In theory, this MAX pattern should result in code that is
+		 * as good as the bit-wise AND above.
+		 *
+		 * In practice, LLVM generates worse code (at the time of
+		 * writing), because its value tracking is not strong enough.
+		 */
+		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
+		index = LLVMBuildSelect(builder, cc, index, c_max, "");
+	}
+
+	return index;
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+				  LLVMValueRef rsrc)
+{
+	if (ctx->screen->info.chip_class <= GFX7) {
+		return rsrc;
+	} else {
+		LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+		LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+		LLVMValueRef tmp;
+
+		tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
+		tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
+		return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
+	}
+}
+
+/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
+ * adjust "index" to point to FMASK. */
+LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+				LLVMValueRef list, LLVMValueRef index,
+				enum ac_descriptor_type desc_type,
+				bool uses_store, bool bindless)
+{
+	LLVMBuilderRef builder = ctx->ac.builder;
+	LLVMValueRef rsrc;
+
+	if (desc_type == AC_DESC_BUFFER) {
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+				      ctx->i32_1);
+		list = LLVMBuildPointerCast(builder, list,
+					    ac_array_in_const32_addr_space(ctx->v4i32), "");
+	} else {
+		assert(desc_type == AC_DESC_IMAGE ||
+		       desc_type == AC_DESC_FMASK);
+	}
+
+	if (bindless)
+		rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
+	else
+		rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+
+	if (desc_type == AC_DESC_IMAGE && uses_store)
+		rsrc = force_dcc_off(ctx, rsrc);
+	return rsrc;
+}
+
+/**
+ * Load an image view, fmask view. or sampler state descriptor.
+ */
+LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+				  LLVMValueRef list, LLVMValueRef index,
+				  enum ac_descriptor_type type)
+{
+	LLVMBuilderRef builder = ctx->ac.builder;
+
+	switch (type) {
+	case AC_DESC_IMAGE:
+		/* The image is at [0:7]. */
+		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+		break;
+	case AC_DESC_BUFFER:
+		/* The buffer is in [4:7]. */
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+				      ctx->i32_1);
+		list = LLVMBuildPointerCast(builder, list,
+					    ac_array_in_const32_addr_space(ctx->v4i32), "");
+		break;
+	case AC_DESC_FMASK:
+		/* The FMASK is at [8:15]. */
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+				      ctx->i32_1);
+		break;
+	case AC_DESC_SAMPLER:
+		/* The sampler state is at [12:15]. */
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+				      LLVMConstInt(ctx->i32, 3, 0));
+		list = LLVMBuildPointerCast(builder, list,
+					    ac_array_in_const32_addr_space(ctx->v4i32), "");
+		break;
+	case AC_DESC_PLANE_0:
+	case AC_DESC_PLANE_1:
+	case AC_DESC_PLANE_2:
+		/* Only used for the multiplane image support for Vulkan. Should
+		 * never be reached in radeonsi.
+		 */
+		unreachable("Plane descriptor requested in radeonsi.");
+	}
+
+	return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
+{
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	struct ac_image_args args = {};
+	LLVMValueRef ptr, image, fmask;
+
+	/* Ignore src0, because KHR_blend_func_extended disallows multiple render
+	 * targets.
+	 */
+
+	/* Load the image descriptor. */
+	STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+	ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
+	ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
+				   ac_array_in_const32_addr_space(ctx->v8i32), "");
+	image = ac_build_load_to_sgpr(&ctx->ac, ptr,
+			LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
+
+	unsigned chan = 0;
+
+	args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
+
+	if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+		args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
+
+	/* Get the current render target layer index. */
+	if (ctx->shader->key.mono.u.ps.fbfetch_layered)
+		args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
+
+	if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+		args.coords[chan++] = si_get_sample_id(ctx);
+
+	if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
+	    !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
+		fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
+			LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
+
+		ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
+					 ctx->shader->key.mono.u.ps.fbfetch_layered);
+	}
+
+	args.opcode = ac_image_load;
+	args.resource = image;
+	args.dmask = 0xf;
+	args.attributes = AC_FUNC_ATTR_READNONE;
+
+	if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+			ac_image_2darraymsaa : ac_image_2dmsaa;
+	else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+			ac_image_1darray : ac_image_1d;
+	else
+		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+			ac_image_2darray : ac_image_2d;
+
+	return ac_build_image_opcode(&ctx->ac, &args);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
deleted file mode 100644
index 4be410ec331..00000000000
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ /dev/null
@@ -1,834 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "ac_llvm_util.h"
-
-void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
-{
-	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-	LLVMBuilderRef builder = ctx->ac.builder;
-
-	if (ctx->shader->selector->force_correct_derivs_after_kill) {
-		/* Kill immediately while maintaining WQM. */
-		ac_build_kill_if_false(&ctx->ac,
-				       ac_build_wqm_vote(&ctx->ac, visible));
-
-		LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
-		mask = LLVMBuildAnd(builder, mask, visible, "");
-		LLVMBuildStore(builder, mask, ctx->postponed_kill);
-		return;
-	}
-
-	ac_build_kill_if_false(&ctx->ac, visible);
-}
-
-static void kil_emit(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef visible;
-
-	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
-		const struct tgsi_full_instruction *inst = emit_data->inst;
-		struct si_shader_context *ctx = si_shader_context(bld_base);
-		LLVMBuilderRef builder = ctx->ac.builder;
-		unsigned i;
-		LLVMValueRef conds[TGSI_NUM_CHANNELS];
-
-		for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-			LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
-			/* UGE because NaN shouldn't get killed */
-			conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value,
-						ctx->ac.f32_0, "");
-		}
-
-		/* And the conditions together */
-		for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
-			conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
-		}
-		visible = conds[0];
-	} else {
-		assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
-		visible = ctx->i1false;
-	}
-
-	si_llvm_emit_kill(&ctx->abi, visible);
-}
-
-static void emit_icmp(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	unsigned pred;
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	switch (emit_data->inst->Instruction.Opcode) {
-	case TGSI_OPCODE_USEQ:
-	case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
-	case TGSI_OPCODE_USNE:
-	case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
-	case TGSI_OPCODE_USGE:
-	case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
-	case TGSI_OPCODE_USLT:
-	case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
-	case TGSI_OPCODE_ISGE:
-	case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
-	case TGSI_OPCODE_ISLT:
-	case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
-	default:
-		assert(!"unknown instruction");
-		pred = 0;
-		break;
-	}
-
-	LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred,
-			emit_data->args[0], emit_data->args[1],"");
-
-	v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
-	emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_ucmp(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);
-
-	LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0,
-				       ctx->i32_0, "");
-
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], "");
-}
-
-static void emit_cmp(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef cond, *args = emit_data->args;
-
-	cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0],
-			     ctx->ac.f32_0, "");
-
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], "");
-}
-
-static void emit_set_cond(const struct lp_build_tgsi_action *action,
-			  struct lp_build_tgsi_context *bld_base,
-			  struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMRealPredicate pred;
-	LLVMValueRef cond;
-
-	/* Use ordered for everything but NE (which is usual for
-	 * float comparisons)
-	 */
-	switch (emit_data->inst->Instruction.Opcode) {
-	case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
-	case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
-	case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
-	case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
-	case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
-	case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
-	default: assert(!"unknown instruction"); pred = 0; break;
-	}
-
-	cond = LLVMBuildFCmp(ctx->ac.builder,
-		pred, emit_data->args[0], emit_data->args[1], "");
-
-	emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder,
-		cond, ctx->ac.f32_1, ctx->ac.f32_0, "");
-}
-
-static void emit_fcmp(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMRealPredicate pred;
-
-	/* Use ordered for everything but NE (which is usual for
-	 * float comparisons)
-	 */
-	switch (emit_data->inst->Instruction.Opcode) {
-	case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
-	case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
-	case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
-	case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
-	default: assert(!"unknown instruction"); pred = 0; break;
-	}
-
-	LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
-			emit_data->args[0], emit_data->args[1],"");
-
-	v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
-	emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_dcmp(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMRealPredicate pred;
-
-	/* Use ordered for everything but NE (which is usual for
-	 * float comparisons)
-	 */
-	switch (emit_data->inst->Instruction.Opcode) {
-	case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
-	case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
-	case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
-	case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
-	default: assert(!"unknown instruction"); pred = 0; break;
-	}
-
-	LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
-			emit_data->args[0], emit_data->args[1],"");
-
-	v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
-	emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_not(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
-	emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, "");
-}
-
-static void emit_arl(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef floor_index =
-		ac_build_intrinsic(&ctx->ac, "llvm.floor.f32", ctx->f32,
-				   &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
-	emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
-			floor_index, ctx->i32, "");
-}
-
-static void emit_and(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_or(const struct lp_build_tgsi_action *action,
-		    struct lp_build_tgsi_context *bld_base,
-		    struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_uadd(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_udiv(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_idiv(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_mod(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_umod(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_shl(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_ushr(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-static void emit_ishr(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_xor(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder,
-			emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_ssg(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	LLVMValueRef  val;
-
-	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
-		val = ac_build_isign(&ctx->ac, emit_data->args[0], 64);
-	} else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
-		val = ac_build_isign(&ctx->ac, emit_data->args[0], 32);
-	} else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
-		val = ac_build_fsign(&ctx->ac, emit_data->args[0], 64);
-	} else {
-		val = ac_build_fsign(&ctx->ac, emit_data->args[0], 32);
-	}
-
-	emit_data->output[emit_data->chan] = val;
-}
-
-static void emit_ineg(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder,
-			emit_data->args[0], "");
-}
-
-static void emit_dneg(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder,
-			emit_data->args[0], "");
-}
-
-static void emit_frac(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	unsigned bitsize;
-
-	if (emit_data->info->opcode == TGSI_OPCODE_FRC)
-		bitsize = 32;
-	else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
-		bitsize = 64;
-	else {
-		assert(0);
-		return;
-	}
-
-	emit_data->output[emit_data->chan] =
-		ac_build_fract(&ctx->ac, emit_data->args[0], bitsize);
-}
-
-static void emit_f2i(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
-			emit_data->args[0], ctx->i32, "");
-}
-
-static void emit_f2u(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder,
-			emit_data->args[0], ctx->i32, "");
-}
-
-static void emit_i2f(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder,
-			emit_data->args[0], ctx->f32, "");
-}
-
-static void emit_u2f(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder,
-			emit_data->args[0], ctx->f32, "");
-}
-
-static void
-build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
-			   struct lp_build_tgsi_context *bld_base,
-			   struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] =
-		ac_build_intrinsic(&ctx->ac, action->intr_name,
-				   emit_data->dst_type, emit_data->args,
-				   emit_data->arg_count, AC_FUNC_ATTR_READNONE);
-}
-
-static void emit_bfi(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef bfi_args[3];
-	LLVMValueRef bfi_sm5;
-	LLVMValueRef cond;
-
-	// Calculate the bitmask: (((1 << src3) - 1) << src2
-	bfi_args[0] = LLVMBuildShl(builder,
-				   LLVMBuildSub(builder,
-						LLVMBuildShl(builder,
-							     ctx->i32_1,
-							     emit_data->args[3], ""),
-						ctx->i32_1, ""),
-				   emit_data->args[2], "");
-
-	bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
-				   emit_data->args[2], "");
-
-	bfi_args[2] = emit_data->args[0];
-
-	/* Calculate:
-	 *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
-	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
-	 */
-	bfi_sm5 =
-		LLVMBuildXor(builder, bfi_args[2],
-			LLVMBuildAnd(builder, bfi_args[0],
-				LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
-					     ""), ""), "");
-
-	/* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
-	 * uses the convenient V_BFI lowering for the above, which follows SM5
-	 * and disagrees with GLSL semantics when bits (src3) is 32.
-	 */
-	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
-			     LLVMConstInt(ctx->i32, 32, 0), "");
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
-}
-
-static void emit_bfe(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	/* FIXME: LLVM 7 returns incorrect result when count is 0.
-	 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
-	 */
-	LLVMValueRef zero = ctx->i32_0;
-	LLVMValueRef bfe_sm5 =
-		ac_build_bfe(&ctx->ac, emit_data->args[0],
-			     emit_data->args[1], emit_data->args[2],
-			     emit_data->info->opcode == TGSI_OPCODE_IBFE);
-
-	/* Correct for GLSL semantics. */
-	LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
-					  LLVMConstInt(ctx->i32, 32, 0), "");
-	LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
-					   zero, "");
-	bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
-}
-
-/* this is ffs in C */
-static void emit_lsb(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]);
-}
-
-/* Find the last bit set. */
-static void emit_umsb(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	emit_data->output[emit_data->chan] =
-		ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type);
-}
-
-/* Find the last bit opposite of the sign bit. */
-static void emit_imsb(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	emit_data->output[emit_data->chan] =
-		ac_build_imsb(&ctx->ac, emit_data->args[0],
-			      emit_data->dst_type);
-}
-
-static void emit_iabs(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	emit_data->output[emit_data->chan] =
-		ac_build_imax(&ctx->ac,  emit_data->args[0],
-			      LLVMBuildNeg(ctx->ac.builder, emit_data->args[0], ""));
-}
-
-static void emit_minmax_int(const struct lp_build_tgsi_action *action,
-			    struct lp_build_tgsi_context *bld_base,
-			    struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMIntPredicate op;
-
-	switch (emit_data->info->opcode) {
-	default:
-		assert(0);
-	case TGSI_OPCODE_IMAX:
-	case TGSI_OPCODE_I64MAX:
-		op = LLVMIntSGT;
-		break;
-	case TGSI_OPCODE_IMIN:
-	case TGSI_OPCODE_I64MIN:
-		op = LLVMIntSLT;
-		break;
-	case TGSI_OPCODE_UMAX:
-	case TGSI_OPCODE_U64MAX:
-		op = LLVMIntUGT;
-		break;
-	case TGSI_OPCODE_UMIN:
-	case TGSI_OPCODE_U64MIN:
-		op = LLVMIntULT;
-		break;
-	}
-
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSelect(ctx->ac.builder,
-				LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0],
-					      emit_data->args[1], ""),
-				emit_data->args[0],
-				emit_data->args[1], "");
-}
-
-static void emit_pk2h(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef v[] = {
-		lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X),
-		lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y),
-	};
-
-
-	/* From the GLSL 4.50 spec:
-	 *   "The rounding mode cannot be set and is undefined."
-	 *
-	 * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
-	 */
-	emit_data->output[emit_data->chan] =
-		LLVMBuildBitCast(ctx->ac.builder, ac_build_cvt_pkrtz_f16(&ctx->ac, v),
-				 ctx->i32, "");
-}
-
-static void emit_up2h(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMTypeRef i16;
-	LLVMValueRef const16, input, val;
-	unsigned i;
-
-	i16 = LLVMInt16TypeInContext(ctx->ac.context);
-	const16 = LLVMConstInt(ctx->i32, 16, 0);
-	input = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-
-	for (i = 0; i < 2; i++) {
-		val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
-		val = LLVMBuildTrunc(ctx->ac.builder, val, i16, "");
-		val = ac_to_float(&ctx->ac, val);
-		emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, "");
-	}
-}
-
-static void emit_fdiv(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	emit_data->output[emit_data->chan] =
-		ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]);
-}
-
-/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
- * the target machine. f64 needs global unsafe math flags to get rsq. */
-static void emit_rsq(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	LLVMValueRef sqrt =
-		ac_build_intrinsic(&ctx->ac, "llvm.sqrt.f32", ctx->f32,
-				   &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
-
-	emit_data->output[emit_data->chan] =
-		ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt);
-}
-
-static void dfracexp_emit(const struct lp_build_tgsi_action *action,
-			  struct lp_build_tgsi_context *bld_base,
-			  struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef in = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-
-	emit_data->output[emit_data->chan] =
-		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
-				   ctx->ac.f64, &in, 1, 0);
-	emit_data->output1[emit_data->chan] =
-		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64",
-				   ctx->ac.i32, &in, 1, 0);
-}
-
-void si_shader_context_init_alu(struct si_shader_context *ctx)
-{
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
-	lp_set_default_actions(bld_base);
-
-	bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
-	bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
-	bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
-	bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32";
-	bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
-	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
-	bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
-	bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
-	bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
-	bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
-	bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
-	bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
-	bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
-	bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
-	bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
-	bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
-	bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
-	bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
-	bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
-	bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
-	bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
-	bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
-	bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
-	bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
-	bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
-	bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
-	bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
-
-	/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
-	if (ctx->screen->info.chip_class >= GFX10) {
-		bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
-		bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
-	} else {
-		bld_base->op_actions[TGSI_OPCODE_FMA].emit =
-			bld_base->op_actions[TGSI_OPCODE_MAD].emit;
-	}
-
-	bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
-	bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
-	bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
-	bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
-	bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
-	bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
-	bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
-	bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
-	bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
-	bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
-	bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
-	bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
-	bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
-	bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
-	bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
-	bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
-	bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
-	bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32";
-	bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
-	bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
-	bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
-	bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
-	bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
-	bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
-	bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
-	bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
-	bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
-	bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
-	bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
-	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
-	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
-	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
-	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
-	bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
-	bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
-	bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
-	bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
-	bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
-	bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
-	bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
-	bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
-	bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
-	bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
-	bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
-	bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
-	bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
-	bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
-	bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
-	bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
-	bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
-	bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
-
-	bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
-	bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
-	bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
-	bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
-
-	bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
-	bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
-
-	bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
-	bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
-	bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
-	bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
-
-	bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
-	bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
-	bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
-	bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
-}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
deleted file mode 100644
index 21b861b8244..00000000000
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ /dev/null
@@ -1,1852 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <llvm/Config/llvm-config.h>
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "sid.h"
-#include "tgsi/tgsi_build.h"
-#include "tgsi/tgsi_util.h"
-#include "ac_llvm_util.h"
-
-static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
-			   struct lp_build_emit_data *emit_data,
-			   LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
-			   LLVMValueRef *fmask_ptr);
-
-/**
- * Given a v8i32 resource descriptor for a buffer, extract the size of the
- * buffer in number of elements and return it as an i32.
- */
-static LLVMValueRef get_buffer_size(
-	struct lp_build_tgsi_context *bld_base,
-	LLVMValueRef descriptor)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef size =
-		LLVMBuildExtractElement(builder, descriptor,
-					LLVMConstInt(ctx->i32, 2, 0), "");
-
-	if (ctx->screen->info.chip_class == GFX8) {
-		/* On GFX8, the descriptor contains the size in bytes,
-		 * but TXQ must return the size in elements.
-		 * The stride is always non-zero for resources using TXQ.
-		 */
-		LLVMValueRef stride =
-			LLVMBuildExtractElement(builder, descriptor,
-						ctx->i32_1, "");
-		stride = LLVMBuildLShr(builder, stride,
-				       LLVMConstInt(ctx->i32, 16, 0), "");
-		stride = LLVMBuildAnd(builder, stride,
-				      LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
-
-		size = LLVMBuildUDiv(builder, size, stride, "");
-	}
-
-	return size;
-}
-
-static LLVMValueRef
-shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
-			 const struct tgsi_full_src_register *reg,
-			 bool ubo)
-{
-	LLVMValueRef index;
-
-	if (!reg->Register.Indirect) {
-		index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
-	} else {
-		index = si_get_indirect_index(ctx, &reg->Indirect,
-					      1, reg->Register.Index);
-	}
-
-	if (ubo)
-		return ctx->abi.load_ubo(&ctx->abi, index);
-	else
-		return ctx->abi.load_ssbo(&ctx->abi, index, false);
-}
-
-static enum ac_image_dim
-ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
-{
-	switch (target) {
-	case TGSI_TEXTURE_1D:
-	case TGSI_TEXTURE_SHADOW1D:
-		if (screen->info.chip_class == GFX9)
-			return ac_image_2d;
-		return ac_image_1d;
-	case TGSI_TEXTURE_2D:
-	case TGSI_TEXTURE_SHADOW2D:
-	case TGSI_TEXTURE_RECT:
-	case TGSI_TEXTURE_SHADOWRECT:
-		return ac_image_2d;
-	case TGSI_TEXTURE_3D:
-		return ac_image_3d;
-	case TGSI_TEXTURE_CUBE:
-	case TGSI_TEXTURE_SHADOWCUBE:
-	case TGSI_TEXTURE_CUBE_ARRAY:
-	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
-		return ac_image_cube;
-	case TGSI_TEXTURE_1D_ARRAY:
-	case TGSI_TEXTURE_SHADOW1D_ARRAY:
-		if (screen->info.chip_class == GFX9)
-			return ac_image_2darray;
-		return ac_image_1darray;
-	case TGSI_TEXTURE_2D_ARRAY:
-	case TGSI_TEXTURE_SHADOW2D_ARRAY:
-		return ac_image_2darray;
-	case TGSI_TEXTURE_2D_MSAA:
-		return ac_image_2dmsaa;
-	case TGSI_TEXTURE_2D_ARRAY_MSAA:
-		return ac_image_2darraymsaa;
-	default:
-		unreachable("unhandled texture type");
-	}
-}
-
-static enum ac_image_dim
-ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
-{
-	enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target);
-
-	/* Match the resource type set in the descriptor. */
-	if (dim == ac_image_cube ||
-	    (screen->info.chip_class <= GFX8 && dim == ac_image_3d))
-		dim = ac_image_2darray;
-	else if (target == TGSI_TEXTURE_2D && screen->info.chip_class == GFX9) {
-		/* When a single layer of a 3D texture is bound, the shader
-		 * will refer to a 2D target, but the descriptor has a 3D type.
-		 * Since the HW ignores BASE_ARRAY in this case, we need to
-		 * send 3 coordinates. This doesn't hurt when the underlying
-		 * texture is non-3D.
-		 */
-		dim = ac_image_3d;
-	}
-
-	return dim;
-}
-
-/**
- * Given a 256-bit resource descriptor, force the DCC enable bit to off.
- *
- * At least on Tonga, executing image stores on images with DCC enabled and
- * non-trivial can eventually lead to lockups. This can occur when an
- * application binds an image as read-only but then uses a shader that writes
- * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
- * program termination) in this case, but it doesn't cost much to be a bit
- * nicer: disabling DCC in the shader still leads to undefined results but
- * avoids the lockup.
- */
-static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
-				  LLVMValueRef rsrc)
-{
-	if (ctx->screen->info.chip_class <= GFX7) {
-		return rsrc;
-	} else {
-		LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
-		LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
-		LLVMValueRef tmp;
-
-		tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
-		tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
-		return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
-	}
-}
-
-/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
- * adjust "index" to point to FMASK. */
-LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
-				LLVMValueRef list, LLVMValueRef index,
-				enum ac_descriptor_type desc_type,
-				bool uses_store, bool bindless)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef rsrc;
-
-	if (desc_type == AC_DESC_BUFFER) {
-		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
-				      ctx->i32_1);
-		list = LLVMBuildPointerCast(builder, list,
-					    ac_array_in_const32_addr_space(ctx->v4i32), "");
-	} else {
-		assert(desc_type == AC_DESC_IMAGE ||
-		       desc_type == AC_DESC_FMASK);
-	}
-
-	if (bindless)
-		rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
-	else
-		rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
-
-	if (desc_type == AC_DESC_IMAGE && uses_store)
-		rsrc = force_dcc_off(ctx, rsrc);
-	return rsrc;
-}
-
-/**
- * Load the resource descriptor for \p image.
- */
-static void
-image_fetch_rsrc(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *image,
-	bool fmask, bool is_store, unsigned target,
-	LLVMValueRef *rsrc)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	bool bindless = image->Register.File != TGSI_FILE_IMAGE;
-	LLVMValueRef rsrc_ptr, index;
-
-	if (bindless) {
-		/* Bindless descriptors are accessible from a different pair of
-		 * user SGPR indices.
-		 */
-		rsrc_ptr = ac_get_arg(&ctx->ac,
-				      ctx->bindless_samplers_and_images);
-		index = lp_build_emit_fetch_src(bld_base, image, TGSI_TYPE_UNSIGNED, 0);
-
-		/* Bindless image descriptors use 16-dword slots. */
-		index = LLVMBuildMul(ctx->ac.builder, index,
-				     LLVMConstInt(ctx->i32, 2, 0), "");
-		/* FMASK is right after the image. */
-		if (fmask)
-			index = LLVMBuildAdd(ctx->ac.builder, index, ctx->i32_1, "");
-	} else {
-		rsrc_ptr = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
-
-		if (!image->Register.Indirect) {
-			index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
-		} else {
-			/* From the GL_ARB_shader_image_load_store extension spec:
-			 *
-			 *    If a shader performs an image load, store, or atomic
-			 *    operation using an image variable declared as an array,
-			 *    and if the index used to select an individual element is
-			 *    negative or greater than or equal to the size of the
-			 *    array, the results of the operation are undefined but may
-			 *    not lead to termination.
-			 */
-			index = si_get_bounded_indirect_index(ctx, &image->Indirect,
-							      image->Register.Index,
-							      ctx->num_images);
-		}
-		/* FMASKs are separate from images. */
-		if (fmask) {
-			index = LLVMBuildAdd(ctx->ac.builder, index,
-					     LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
-		}
-		index = LLVMBuildSub(ctx->ac.builder,
-				     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
-				     index, "");
-	}
-
-	*rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
-				   fmask ? AC_DESC_FMASK :
-				   target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
-				   is_store, bindless);
-}
-
-static void image_fetch_coords(
-		struct lp_build_tgsi_context *bld_base,
-		const struct tgsi_full_instruction *inst,
-		unsigned src, LLVMValueRef desc,
-		LLVMValueRef *coords)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	unsigned target = inst->Memory.Texture;
-	unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
-	LLVMValueRef tmp;
-	int chan;
-
-	for (chan = 0; chan < num_coords; ++chan) {
-		tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
-		tmp = ac_to_integer(&ctx->ac, tmp);
-		coords[chan] = tmp;
-	}
-
-	if (target == TGSI_TEXTURE_2D_MSAA ||
-	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-		/* Need the sample index as well. */
-		tmp = lp_build_emit_fetch(bld_base, inst, src, TGSI_SWIZZLE_W);
-		coords[chan] = ac_to_integer(&ctx->ac, tmp);
-	}
-
-	if (ctx->screen->info.chip_class == GFX9) {
-		/* 1D textures are allocated and used as 2D on GFX9. */
-		if (target == TGSI_TEXTURE_1D) {
-			coords[1] = ctx->i32_0;
-		} else if (target == TGSI_TEXTURE_1D_ARRAY) {
-			coords[2] = coords[1];
-			coords[1] = ctx->i32_0;
-		} else if (target == TGSI_TEXTURE_2D) {
-			/* The hw can't bind a slice of a 3D image as a 2D
-			 * image, because it ignores BASE_ARRAY if the target
-			 * is 3D. The workaround is to read BASE_ARRAY and set
-			 * it as the 3rd address operand for all 2D images.
-			 */
-			LLVMValueRef first_layer, const5, mask;
-
-			const5 = LLVMConstInt(ctx->i32, 5, 0);
-			mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
-			first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
-			first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
-
-			coords[2] = first_layer;
-		}
-	}
-}
-
-static unsigned get_cache_policy(struct si_shader_context *ctx,
-				 const struct tgsi_full_instruction *inst,
-				 bool atomic, bool may_store_unaligned,
-				 bool writeonly_memory)
-{
-	unsigned cache_policy = 0;
-
-	if (!atomic &&
-	    /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.
-	     * All store opcodes not aligned to a dword are affected.
-	     * The only way to get unaligned stores in radeonsi is through
-	     * shader images. */
-	    ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) ||
-	     /* If this is write-only, don't keep data in L1 to prevent
-	      * evicting L1 cache lines that may be needed by other
-	      * instructions. */
-	     writeonly_memory ||
-	     inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))) {
-		cache_policy |= ac_glc;
-	}
-
-	if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
-		cache_policy |= ac_slc;
-
-	return cache_policy;
-}
-
-static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
-                                   const struct tgsi_full_instruction *inst,
-                                   LLVMTypeRef type, int arg)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef offset, ptr;
-	int addr_space;
-
-	offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
-	offset = ac_to_integer(&ctx->ac, offset);
-
-	ptr = ctx->ac.lds;
-	ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
-	addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-	ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
-
-	return ptr;
-}
-
-static void load_emit_memory(
-		struct si_shader_context *ctx,
-		struct lp_build_emit_data *emit_data)
-{
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	unsigned writemask = inst->Dst[0].Register.WriteMask;
-	LLVMValueRef channels[4], ptr, derived_ptr, index;
-	int chan;
-
-	ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
-
-	for (chan = 0; chan < 4; ++chan) {
-		if (!(writemask & (1 << chan))) {
-			channels[chan] = LLVMGetUndef(ctx->f32);
-			continue;
-		}
-
-		index = LLVMConstInt(ctx->i32, chan, 0);
-		derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
-		channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
-	}
-	emit_data->output[emit_data->chan] = ac_build_gather_values(&ctx->ac, channels, 4);
-}
-
-/**
- * Return true if the memory accessed by a LOAD or STORE instruction is
- * read-only or write-only, respectively.
- *
- * \param shader_buffers_reverse_access_mask
- *	For LOAD, set this to (store | atomic) slot usage in the shader.
- *	For STORE, set this to (load | atomic) slot usage in the shader.
- * \param images_reverse_access_mask  Same as above, but for images.
- * \param bindless_buffer_reverse_access_mask  Same as above, but for bindless image buffers.
- * \param bindless_image_reverse_access_mask   Same as above, but for bindless images.
- */
-static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
-				  const struct tgsi_shader_info *info,
-				  unsigned shader_buffers_reverse_access_mask,
-				  unsigned images_reverse_access_mask,
-				  bool bindless_buffer_reverse_access_mask,
-				  bool bindless_image_reverse_access_mask)
-{
-	enum tgsi_file_type resource_file;
-	unsigned resource_index;
-	bool resource_indirect;
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
-		resource_file = inst->Dst[0].Register.File;
-		resource_index = inst->Dst[0].Register.Index;
-		resource_indirect = inst->Dst[0].Register.Indirect;
-	} else {
-		resource_file = inst->Src[0].Register.File;
-		resource_index = inst->Src[0].Register.Index;
-		resource_indirect = inst->Src[0].Register.Indirect;
-	}
-
-	assert(resource_file == TGSI_FILE_BUFFER ||
-	       resource_file == TGSI_FILE_IMAGE ||
-	       /* bindless image */
-	       resource_file == TGSI_FILE_INPUT ||
-	       resource_file == TGSI_FILE_OUTPUT ||
-	       resource_file == TGSI_FILE_CONSTANT ||
-	       resource_file == TGSI_FILE_TEMPORARY ||
-	       resource_file == TGSI_FILE_IMMEDIATE);
-
-	assert(resource_file != TGSI_FILE_BUFFER ||
-	       inst->Memory.Texture == TGSI_TEXTURE_BUFFER);
-
-	bool bindless = resource_file != TGSI_FILE_BUFFER &&
-			resource_file != TGSI_FILE_IMAGE;
-
-	/* RESTRICT means NOALIAS.
-	 * If there are no writes, we can assume the accessed memory is read-only.
-	 * If there are no reads, we can assume the accessed memory is write-only.
-	 */
-	if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) {
-		unsigned reverse_access_mask;
-
-		if (resource_file == TGSI_FILE_BUFFER) {
-			reverse_access_mask = shader_buffers_reverse_access_mask;
-		} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-			reverse_access_mask = info->images_buffers &
-					      images_reverse_access_mask;
-		} else {
-			reverse_access_mask = ~info->images_buffers &
-					      images_reverse_access_mask;
-		}
-
-		if (resource_indirect) {
-			if (!reverse_access_mask)
-				return true;
-		} else {
-			if (!(reverse_access_mask &
-			      (1u << resource_index)))
-				return true;
-		}
-	}
-
-	/* If there are no buffer writes (for both shader buffers & image
-	 * buffers), it implies that buffer memory is read-only.
-	 * If there are no buffer reads (for both shader buffers & image
-	 * buffers), it implies that buffer memory is write-only.
-	 *
-	 * Same for the case when there are no writes/reads for non-buffer
-	 * images.
-	 */
-	if (resource_file == TGSI_FILE_BUFFER ||
-	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-		if (!shader_buffers_reverse_access_mask &&
-		    !(info->images_buffers & images_reverse_access_mask) &&
-		    !bindless_buffer_reverse_access_mask)
-			return true;
-	} else {
-		if (!(~info->images_buffers & images_reverse_access_mask) &&
-		    !bindless_image_reverse_access_mask)
-			return true;
-	}
-	return false;
-}
-
-static void load_emit(
-		const struct lp_build_tgsi_action *action,
-		struct lp_build_tgsi_context *bld_base,
-		struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
-	bool can_speculate = false;
-	LLVMValueRef vindex = ctx->i32_0;
-	LLVMValueRef voffset = ctx->i32_0;
-	struct ac_image_args args = {};
-
-	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
-		load_emit_memory(ctx, emit_data);
-		return;
-	}
-
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
-	    inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
-		bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
-		args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
-		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
-	} else {
-		unsigned target = inst->Memory.Texture;
-
-		image_fetch_rsrc(bld_base, &inst->Src[0], false, false, target, &args.resource);
-		image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
-
-		if ((inst->Memory.Texture == TGSI_TEXTURE_2D_MSAA ||
-		     inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
-		    !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-			LLVMValueRef fmask;
-
-			image_fetch_rsrc(bld_base, &inst->Src[0], true, false, target, &fmask);
-			ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
-						 inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
-		}
-		vindex = args.coords[0]; /* for buffers only */
-	}
-
-	if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
-		emit_data->output[emit_data->chan] =
-			ac_build_buffer_load(&ctx->ac, args.resource,
-					     util_last_bit(inst->Dst[0].Register.WriteMask),
-					     NULL, voffset, NULL, 0, 0, true, true);
-		return;
-	}
-
-	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-		ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
-
-	can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
-			  is_oneway_access_only(inst, info,
-						info->shader_buffers_store |
-						info->shader_buffers_atomic,
-						info->images_store |
-						info->images_atomic,
-						info->uses_bindless_buffer_store |
-						info->uses_bindless_buffer_atomic,
-						info->uses_bindless_image_store |
-						info->uses_bindless_image_atomic);
-	args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
-
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-		/* Don't use SMEM for shader buffer loads, because LLVM doesn't
-		 * select SMEM for SI.load.const with a non-constant offset, and
-		 * constant offsets practically don't exist with shader buffers.
-		 *
-		 * Also, SI.load.const doesn't use inst_offset when it's lowered
-		 * to VMEM, so we just end up with more VALU instructions in the end
-		 * and no benefit.
-		 *
-		 * TODO: Remove this line once LLVM can select SMEM with a non-constant
-		 *       offset, and can derive inst_offset when VMEM is selected.
-		 *       After that, si_memory_barrier should invalidate sL1 for shader
-		 *       buffers.
-		 */
-		emit_data->output[emit_data->chan] =
-			ac_build_buffer_load(&ctx->ac, args.resource,
-					     util_last_bit(inst->Dst[0].Register.WriteMask),
-					     NULL, voffset, NULL, 0,
-					     args.cache_policy, can_speculate, false);
-		return;
-	}
-
-	if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-		unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
-		LLVMValueRef result =
-			ac_build_buffer_load_format(&ctx->ac,
-						    args.resource,
-						    vindex,
-						    ctx->i32_0,
-						    num_channels,
-						    args.cache_policy,
-						    can_speculate);
-		emit_data->output[emit_data->chan] =
-			ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
-	} else {
-		args.opcode = ac_image_load;
-		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
-		args.attributes = ac_get_load_intr_attribs(can_speculate);
-		args.dmask = 0xf;
-
-		emit_data->output[emit_data->chan] =
-			ac_build_image_opcode(&ctx->ac, &args);
-	}
-}
-
-static void store_emit_buffer(struct si_shader_context *ctx,
-			      LLVMValueRef resource,
-			      unsigned writemask,
-			      LLVMValueRef value,
-			      LLVMValueRef voffset,
-			      unsigned cache_policy,
-			      bool writeonly_memory)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef base_data = value;
-	LLVMValueRef base_offset = voffset;
-
-	while (writemask) {
-		int start, count;
-		LLVMValueRef data, voff;
-
-		u_bit_scan_consecutive_range(&writemask, &start, &count);
-
-		if (count == 3 && ac_has_vec3_support(ctx->ac.chip_class, false)) {
-			LLVMValueRef values[3] = {
-				LLVMBuildExtractElement(builder, base_data,
-							LLVMConstInt(ctx->i32, start, 0), ""),
-				LLVMBuildExtractElement(builder, base_data,
-							LLVMConstInt(ctx->i32, start + 1, 0), ""),
-				LLVMBuildExtractElement(builder, base_data,
-							LLVMConstInt(ctx->i32, start + 2, 0), ""),
-			};
-			data = ac_build_gather_values(&ctx->ac, values, 3);
-		} else if (count >= 3) {
-			data = base_data;
-		} else if (count == 2) {
-			LLVMValueRef values[2] = {
-				LLVMBuildExtractElement(builder, base_data,
-							LLVMConstInt(ctx->i32, start, 0), ""),
-				LLVMBuildExtractElement(builder, base_data,
-							LLVMConstInt(ctx->i32, start + 1, 0), ""),
-			};
-
-			data = ac_build_gather_values(&ctx->ac, values, 2);
-		} else {
-			assert(count == 1);
-			data = LLVMBuildExtractElement(
-				builder, base_data,
-				LLVMConstInt(ctx->i32, start, 0), "");
-		}
-
-		voff = base_offset;
-		if (start != 0) {
-			voff = LLVMBuildAdd(
-				builder, voff,
-				LLVMConstInt(ctx->i32, start * 4, 0), "");
-		}
-
-		ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
-					    voff, ctx->i32_0, 0, cache_policy);
-	}
-}
-
-static void store_emit_memory(
-		struct si_shader_context *ctx,
-		struct lp_build_emit_data *emit_data)
-{
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	LLVMBuilderRef builder = ctx->ac.builder;
-	unsigned writemask = inst->Dst[0].Register.WriteMask;
-	LLVMValueRef ptr, derived_ptr, data, index;
-	int chan;
-
-	ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
-
-	for (chan = 0; chan < 4; ++chan) {
-		if (!(writemask & (1 << chan))) {
-			continue;
-		}
-		data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan);
-		index = LLVMConstInt(ctx->i32, chan, 0);
-		derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
-		LLVMBuildStore(builder, data, derived_ptr);
-	}
-}
-
-static void store_emit(
-		const struct lp_build_tgsi_action *action,
-		struct lp_build_tgsi_context *bld_base,
-		struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
-	struct tgsi_full_src_register resource_reg =
-		tgsi_full_src_register_from_dst(&inst->Dst[0]);
-	unsigned target = inst->Memory.Texture;
-
-	if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
-		store_emit_memory(ctx, emit_data);
-		return;
-	}
-
-	bool writeonly_memory = is_oneway_access_only(inst, info,
-						      info->shader_buffers_load |
-						      info->shader_buffers_atomic,
-						      info->images_load |
-						      info->images_atomic,
-						      info->uses_bindless_buffer_load |
-						      info->uses_bindless_buffer_atomic,
-						      info->uses_bindless_image_load |
-						      info->uses_bindless_image_atomic);
-	LLVMValueRef chans[4];
-	LLVMValueRef vindex = ctx->i32_0;
-	LLVMValueRef voffset = ctx->i32_0;
-	struct ac_image_args args = {};
-
-	for (unsigned chan = 0; chan < 4; ++chan)
-		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
-
-	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-		args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
-		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0));
-	} else {
-		image_fetch_rsrc(bld_base, &resource_reg, false, true, target, &args.resource);
-		image_fetch_coords(bld_base, inst, 0, args.resource, args.coords);
-		vindex = args.coords[0]; /* for buffers only */
-	}
-
-	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-		ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
-
-	bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
-	args.cache_policy = get_cache_policy(ctx, inst,
-					     false, /* atomic */
-					     is_image, /* may_store_unaligned */
-					     writeonly_memory);
-
-	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-		store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask,
-				  ac_build_gather_values(&ctx->ac, chans, 4),
-				  voffset, args.cache_policy, writeonly_memory);
-		return;
-	}
-
-	if (target == TGSI_TEXTURE_BUFFER) {
-		unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
-
-		ac_build_buffer_store_format(&ctx->ac, args.resource,
-					     ac_build_gather_values(&ctx->ac, chans, num_channels),
-					     vindex, ctx->i32_0 /* voffset */,
-					     num_channels,
-					     args.cache_policy);
-	} else {
-		args.opcode = ac_image_store;
-		args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
-		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
-		args.attributes = AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY;
-		args.dmask = 0xf;
-
-		emit_data->output[emit_data->chan] =
-			ac_build_image_opcode(&ctx->ac, &args);
-	}
-}
-
-static void atomic_emit_memory(struct si_shader_context *ctx,
-                               struct lp_build_emit_data *emit_data) {
-	LLVMBuilderRef builder = ctx->ac.builder;
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	LLVMValueRef ptr, result, arg;
-	const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
-	ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
-
-	arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
-	arg = ac_to_integer(&ctx->ac, arg);
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
-		LLVMValueRef new_data;
-		new_data = lp_build_emit_fetch(&ctx->bld_base,
-		                               inst, 3, 0);
-
-		new_data = ac_to_integer(&ctx->ac, new_data);
-
-		result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, arg, new_data,
-						  sync_scope);
-		result = LLVMBuildExtractValue(builder, result, 0, "");
-	} else {
-		LLVMAtomicRMWBinOp op;
-
-		switch(inst->Instruction.Opcode) {
-			case TGSI_OPCODE_ATOMUADD:
-				op = LLVMAtomicRMWBinOpAdd;
-				break;
-			case TGSI_OPCODE_ATOMXCHG:
-				op = LLVMAtomicRMWBinOpXchg;
-				break;
-			case TGSI_OPCODE_ATOMAND:
-				op = LLVMAtomicRMWBinOpAnd;
-				break;
-			case TGSI_OPCODE_ATOMOR:
-				op = LLVMAtomicRMWBinOpOr;
-				break;
-			case TGSI_OPCODE_ATOMXOR:
-				op = LLVMAtomicRMWBinOpXor;
-				break;
-			case TGSI_OPCODE_ATOMUMIN:
-				op = LLVMAtomicRMWBinOpUMin;
-				break;
-			case TGSI_OPCODE_ATOMUMAX:
-				op = LLVMAtomicRMWBinOpUMax;
-				break;
-			case TGSI_OPCODE_ATOMIMIN:
-				op = LLVMAtomicRMWBinOpMin;
-				break;
-			case TGSI_OPCODE_ATOMIMAX:
-				op = LLVMAtomicRMWBinOpMax;
-				break;
-			default:
-				unreachable("unknown atomic opcode");
-		}
-
-		result = ac_build_atomic_rmw(&ctx->ac, op, ptr, arg, sync_scope);
-	}
-	emit_data->output[emit_data->chan] =
-		LLVMBuildBitCast(builder, result, ctx->f32, "");
-}
-
-static void atomic_emit(
-		const struct lp_build_tgsi_action *action,
-		struct lp_build_tgsi_context *bld_base,
-		struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	struct ac_image_args args = {};
-	unsigned num_data = 0;
-	LLVMValueRef vindex = ctx->i32_0;
-	LLVMValueRef voffset = ctx->i32_0;
-
-	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
-		atomic_emit_memory(ctx, emit_data);
-		return;
-	}
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
-		/* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
-		 * of arguments, which is reversed relative to TGSI (and GLSL)
-		 */
-		args.data[num_data++] =
-			ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 3, 0));
-	}
-
-	args.data[num_data++] =
-		ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0));
-
-	args.cache_policy = get_cache_policy(ctx, inst, true, false, false);
-
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-		args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
-		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
-	} else {
-		image_fetch_rsrc(bld_base, &inst->Src[0], false, true,
-				inst->Memory.Texture, &args.resource);
-		image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
-		vindex = args.coords[0]; /* for buffers only */
-	}
-
-	if (inst->Src[0].Register.File != TGSI_FILE_BUFFER &&
-	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-		LLVMValueRef buf_args[7];
-		unsigned num_args = 0;
-
-		buf_args[num_args++] = args.data[0];
-		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
-			buf_args[num_args++] = args.data[1];
-
-		buf_args[num_args++] = args.resource;
-		buf_args[num_args++] = vindex;
-		buf_args[num_args++] = voffset;
-		buf_args[num_args++] = ctx->i32_0; /* soffset */
-		buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0);
-
-		char intrinsic_name[64];
-		snprintf(intrinsic_name, sizeof(intrinsic_name),
-			 "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name);
-		emit_data->output[emit_data->chan] =
-			ac_to_float(&ctx->ac,
-				    ac_build_intrinsic(&ctx->ac, intrinsic_name,
-						       ctx->i32, buf_args, num_args, 0));
-		return;
-	}
-
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-		LLVMValueRef buf_args[7];
-		unsigned num_args = 0;
-
-		buf_args[num_args++] = args.data[0];
-		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
-			buf_args[num_args++] = args.data[1];
-
-		buf_args[num_args++] = args.resource;
-		buf_args[num_args++] = vindex;
-		buf_args[num_args++] = voffset;
-		buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
-
-		char intrinsic_name[40];
-		snprintf(intrinsic_name, sizeof(intrinsic_name),
-			 "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
-		emit_data->output[emit_data->chan] =
-			ac_to_float(&ctx->ac,
-				    ac_build_intrinsic(&ctx->ac, intrinsic_name,
-						       ctx->i32, buf_args, num_args, 0));
-	} else {
-		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
-			args.opcode = ac_image_atomic_cmpswap;
-		} else {
-			args.opcode = ac_image_atomic;
-			switch (inst->Instruction.Opcode) {
-			case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break;
-			case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break;
-			case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break;
-			case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break;
-			case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break;
-			case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break;
-			case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break;
-			case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break;
-			case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break;
-			case TGSI_OPCODE_ATOMINC_WRAP:
-				args.atomic = ac_atomic_inc_wrap;
-				break;
-			case TGSI_OPCODE_ATOMDEC_WRAP:
-				args.atomic = ac_atomic_dec_wrap;
-				break;
-			default: unreachable("unhandled image atomic");
-			}
-		}
-
-		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
-		emit_data->output[emit_data->chan] =
-			ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args));
-	}
-}
-
-static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
-				unsigned target, LLVMValueRef out)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-
-	/* 1D textures are allocated and used as 2D on GFX9. */
-        if (ctx->screen->info.chip_class == GFX9 &&
-	    (target == TGSI_TEXTURE_1D_ARRAY ||
-	     target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
-		LLVMValueRef layers =
-			LLVMBuildExtractElement(builder, out,
-						LLVMConstInt(ctx->i32, 2, 0), "");
-		out = LLVMBuildInsertElement(builder, out, layers,
-					     ctx->i32_1, "");
-	}
-
-	/* Divide the number of layers by 6 to get the number of cubes. */
-	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-		LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
-
-		LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
-		z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
-
-		out = LLVMBuildInsertElement(builder, out, z, imm2, "");
-	}
-	return out;
-}
-
-static void resq_emit(
-		const struct lp_build_tgsi_action *action,
-		struct lp_build_tgsi_context *bld_base,
-		struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	const struct tgsi_full_src_register *reg =
-		&inst->Src[inst->Instruction.Opcode == TGSI_OPCODE_TXQ ? 1 : 0];
-
-	if (reg->Register.File == TGSI_FILE_BUFFER) {
-		LLVMValueRef rsrc = shader_buffer_fetch_rsrc(ctx, reg, false);
-
-		emit_data->output[emit_data->chan] =
-			LLVMBuildExtractElement(builder, rsrc,
-						LLVMConstInt(ctx->i32, 2, 0), "");
-		return;
-	}
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
-	    inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
-		LLVMValueRef rsrc;
-
-		tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
-		/* Read the size from the buffer descriptor directly. */
-		emit_data->output[emit_data->chan] =
-			get_buffer_size(bld_base, rsrc);
-		return;
-	}
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
-	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-		LLVMValueRef rsrc;
-
-		image_fetch_rsrc(bld_base, reg, false, false, inst->Memory.Texture, &rsrc);
-		emit_data->output[emit_data->chan] =
-			get_buffer_size(bld_base, rsrc);
-		return;
-	}
-
-	unsigned target;
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
-		target = inst->Texture.Texture;
-	} else {
-		if (inst->Memory.Texture == TGSI_TEXTURE_3D)
-			target = TGSI_TEXTURE_2D_ARRAY;
-		else
-			target = inst->Memory.Texture;
-	}
-
-	struct ac_image_args args = {};
-	args.opcode = ac_image_get_resinfo;
-	args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
-	args.dmask = 0xf;
-	args.attributes = AC_FUNC_ATTR_READNONE;
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
-		tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL);
-		args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-	} else {
-		image_fetch_rsrc(bld_base, reg, false, false, target, &args.resource);
-		args.lod = ctx->i32_0;
-	}
-
-	emit_data->output[emit_data->chan] =
-		fix_resinfo(ctx, target, ac_build_image_opcode(&ctx->ac, &args));
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
-	    (target == TGSI_TEXTURE_2D_MSAA ||
-	     target == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
-		LLVMValueRef samples =
-			ac_build_image_get_sample_count(&ctx->ac, args.resource);
-
-		emit_data->output[emit_data->chan] =
-			LLVMBuildInsertElement(ctx->ac.builder,
-					       emit_data->output[emit_data->chan],
-					       samples,
-					       LLVMConstInt(ctx->i32, 3, 0), "");
-	}
-}
-
-/**
- * Load an image view, fmask view. or sampler state descriptor.
- */
-LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
-				  LLVMValueRef list, LLVMValueRef index,
-				  enum ac_descriptor_type type)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-
-	switch (type) {
-	case AC_DESC_IMAGE:
-		/* The image is at [0:7]. */
-		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
-		break;
-	case AC_DESC_BUFFER:
-		/* The buffer is in [4:7]. */
-		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
-				      ctx->i32_1);
-		list = LLVMBuildPointerCast(builder, list,
-					    ac_array_in_const32_addr_space(ctx->v4i32), "");
-		break;
-	case AC_DESC_FMASK:
-		/* The FMASK is at [8:15]. */
-		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
-				      ctx->i32_1);
-		break;
-	case AC_DESC_SAMPLER:
-		/* The sampler state is at [12:15]. */
-		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
-				      LLVMConstInt(ctx->i32, 3, 0));
-		list = LLVMBuildPointerCast(builder, list,
-					    ac_array_in_const32_addr_space(ctx->v4i32), "");
-		break;
-	case AC_DESC_PLANE_0:
-	case AC_DESC_PLANE_1:
-	case AC_DESC_PLANE_2:
-		/* Only used for the multiplane image support for Vulkan. Should
-		 * never be reached in radeonsi.
-		 */
-		unreachable("Plane descriptor requested in radeonsi.");
-	}
-
-	return ac_build_load_to_sgpr(&ctx->ac, list, index);
-}
-
-/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
- *
- * GFX6-GFX7:
- *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
- *   filtering manually. The driver sets img7 to a mask clearing
- *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
- *     s_and_b32 samp0, samp0, img7
- *
- * GFX8:
- *   The ANISO_OVERRIDE sampler field enables this fix in TA.
- */
-static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
-					   LLVMValueRef res, LLVMValueRef samp)
-{
-	LLVMValueRef img7, samp0;
-
-	if (ctx->screen->info.chip_class >= GFX8)
-		return samp;
-
-	img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
-				       LLVMConstInt(ctx->i32, 7, 0), "");
-	samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
-					ctx->i32_0, "");
-	samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
-	return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
-				      ctx->i32_0, "");
-}
-
-static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
-			   struct lp_build_emit_data *emit_data,
-			   LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
-			   LLVMValueRef *fmask_ptr)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	const struct tgsi_full_src_register *reg;
-	unsigned target = inst->Texture.Texture;
-	unsigned sampler_src;
-	LLVMValueRef index;
-
-	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
-	reg = &emit_data->inst->Src[sampler_src];
-
-	if (reg->Register.Indirect) {
-		index = si_get_bounded_indirect_index(ctx,
-						      &reg->Indirect,
-						      reg->Register.Index,
-						      ctx->num_samplers);
-		index = LLVMBuildAdd(ctx->ac.builder, index,
-				     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
-	} else {
-		index = LLVMConstInt(ctx->i32,
-				     si_get_sampler_slot(reg->Register.Index), 0);
-	}
-
-	if (reg->Register.File != TGSI_FILE_SAMPLER) {
-		/* Bindless descriptors are accessible from a different pair of
-		 * user SGPR indices.
-		 */
-		list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
-		index = lp_build_emit_fetch_src(bld_base, reg,
-						TGSI_TYPE_UNSIGNED, 0);
-
-		/* Since bindless handle arithmetic can contain an unsigned integer
-		 * wraparound and si_load_sampler_desc assumes there isn't any,
-		 * use GEP without "inbounds" (inside ac_build_pointer_add)
-		 * to prevent incorrect code generation and hangs.
-		 */
-		index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
-		list = ac_build_pointer_add(&ctx->ac, list, index);
-		index = ctx->i32_0;
-	}
-
-	if (target == TGSI_TEXTURE_BUFFER)
-		*res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
-	else
-		*res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
-
-	if (samp_ptr)
-		*samp_ptr = NULL;
-	if (fmask_ptr)
-		*fmask_ptr = NULL;
-
-	if (target == TGSI_TEXTURE_2D_MSAA ||
-	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-		if (fmask_ptr)
-			*fmask_ptr = si_load_sampler_desc(ctx, list, index,
-						          AC_DESC_FMASK);
-	} else if (target != TGSI_TEXTURE_BUFFER) {
-		if (samp_ptr) {
-			*samp_ptr = si_load_sampler_desc(ctx, list, index,
-						         AC_DESC_SAMPLER);
-			*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
-		}
-	}
-}
-
-/* Gather4 should follow the same rules as bilinear filtering, but the hardware
- * incorrectly forces nearest filtering if the texture format is integer.
- * The only effect it has on Gather4, which always returns 4 texels for
- * bilinear filtering, is that the final coordinates are off by 0.5 of
- * the texel size.
- *
- * The workaround is to subtract 0.5 from the unnormalized coordinates,
- * or (0.5 / size) from the normalized coordinates.
- *
- * However, cube textures with 8_8_8_8 data formats require a different
- * workaround of overriding the num format to USCALED/SSCALED. This would lose
- * precision in 32-bit data formats, so it needs to be applied dynamically at
- * runtime. In this case, return an i1 value that indicates whether the
- * descriptor was overridden (and hence a fixup of the sampler result is needed).
- */
-static LLVMValueRef
-si_lower_gather4_integer(struct si_shader_context *ctx,
-			 struct ac_image_args *args,
-			 unsigned target,
-			 enum tgsi_return_type return_type)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef wa_8888 = NULL;
-	LLVMValueRef half_texel[2];
-
-	assert(return_type == TGSI_RETURN_TYPE_SINT ||
-	       return_type == TGSI_RETURN_TYPE_UINT);
-
-	if (target == TGSI_TEXTURE_CUBE ||
-	    target == TGSI_TEXTURE_CUBE_ARRAY) {
-		LLVMValueRef formats;
-		LLVMValueRef data_format;
-		LLVMValueRef wa_formats;
-
-		formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
-
-		data_format = LLVMBuildLShr(builder, formats,
-					    LLVMConstInt(ctx->i32, 20, false), "");
-		data_format = LLVMBuildAnd(builder, data_format,
-					   LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
-		wa_8888 = LLVMBuildICmp(
-			builder, LLVMIntEQ, data_format,
-			LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
-			"");
-
-		uint32_t wa_num_format =
-			return_type == TGSI_RETURN_TYPE_UINT ?
-			S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
-			S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
-		wa_formats = LLVMBuildAnd(builder, formats,
-					  LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
-					  "");
-		wa_formats = LLVMBuildOr(builder, wa_formats,
-					LLVMConstInt(ctx->i32, wa_num_format, false), "");
-
-		formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
-		args->resource = LLVMBuildInsertElement(
-			builder, args->resource, formats, ctx->i32_1, "");
-	}
-
-	if (target == TGSI_TEXTURE_RECT ||
-	    target == TGSI_TEXTURE_SHADOWRECT) {
-		assert(!wa_8888);
-		half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
-	} else {
-		struct ac_image_args resinfo = {};
-		struct lp_build_if_state if_ctx;
-
-		if (wa_8888) {
-			/* Skip the texture size query entirely if we don't need it. */
-			lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
-		}
-
-		/* Query the texture size. */
-		resinfo.opcode = ac_image_get_resinfo;
-		resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
-		resinfo.resource = args->resource;
-		resinfo.sampler = args->sampler;
-		resinfo.lod = ctx->ac.i32_0;
-		resinfo.dmask = 0xf;
-		resinfo.attributes = AC_FUNC_ATTR_READNONE;
-
-		LLVMValueRef texsize =
-			fix_resinfo(ctx, target,
-				    ac_build_image_opcode(&ctx->ac, &resinfo));
-
-		/* Compute -0.5 / size. */
-		for (unsigned c = 0; c < 2; c++) {
-			half_texel[c] =
-				LLVMBuildExtractElement(builder, texsize,
-							LLVMConstInt(ctx->i32, c, 0), "");
-			half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
-			half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, half_texel[c]);
-			half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
-						      LLVMConstReal(ctx->f32, -0.5), "");
-		}
-
-		if (wa_8888) {
-			lp_build_endif(&if_ctx);
-
-			LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
-
-			for (unsigned c = 0; c < 2; c++) {
-				LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
-				half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
-							     values, bb);
-			}
-		}
-	}
-
-	for (unsigned c = 0; c < 2; c++) {
-		LLVMValueRef tmp;
-		tmp = ac_to_float(&ctx->ac, args->coords[c]);
-		tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
-		args->coords[c] = ac_to_integer(&ctx->ac, tmp);
-	}
-
-	return wa_8888;
-}
-
-/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
- * result after the gather operation.
- */
-static LLVMValueRef
-si_fix_gather4_integer_result(struct si_shader_context *ctx,
-			   LLVMValueRef result,
-			   enum tgsi_return_type return_type,
-			   LLVMValueRef wa)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-
-	assert(return_type == TGSI_RETURN_TYPE_SINT ||
-	       return_type == TGSI_RETURN_TYPE_UINT);
-
-	for (unsigned chan = 0; chan < 4; ++chan) {
-		LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
-		LLVMValueRef value;
-		LLVMValueRef wa_value;
-
-		value = LLVMBuildExtractElement(builder, result, chanv, "");
-
-		if (return_type == TGSI_RETURN_TYPE_UINT)
-			wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
-		else
-			wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
-		wa_value = ac_to_float(&ctx->ac, wa_value);
-		value = LLVMBuildSelect(builder, wa, wa_value, value, "");
-
-		result = LLVMBuildInsertElement(builder, result, value, chanv, "");
-	}
-
-	return result;
-}
-
-static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
-				struct lp_build_tgsi_context *bld_base,
-				struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	unsigned opcode = inst->Instruction.Opcode;
-	unsigned target = inst->Texture.Texture;
-	struct ac_image_args args = {};
-	int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
-	unsigned chan;
-	bool has_offset = inst->Texture.NumOffsets > 0;
-	LLVMValueRef fmask_ptr = NULL;
-
-	tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr);
-
-	if (target == TGSI_TEXTURE_BUFFER) {
-		LLVMValueRef vindex = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-		unsigned num_channels =
-			util_last_bit(inst->Dst[0].Register.WriteMask);
-		LLVMValueRef result =
-			ac_build_buffer_load_format(&ctx->ac,
-						    args.resource,
-						    vindex,
-						    ctx->i32_0,
-						    num_channels, 0, true);
-		emit_data->output[emit_data->chan] =
-			ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
-		return;
-	}
-
-	/* Fetch and project texture coordinates */
-	args.coords[3] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_W);
-	for (chan = 0; chan < 3; chan++) {
-		args.coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
-		if (opcode == TGSI_OPCODE_TXP)
-			args.coords[chan] = ac_build_fdiv(&ctx->ac,
-				args.coords[chan], args.coords[3]);
-	}
-
-	if (opcode == TGSI_OPCODE_TXP)
-		args.coords[3] = ctx->ac.f32_1;
-
-	/* Pack offsets. */
-	if (has_offset &&
-	    opcode != TGSI_OPCODE_TXF &&
-	    opcode != TGSI_OPCODE_TXF_LZ) {
-		/* The offsets are six-bit signed integers packed like this:
-		 *   X=[5:0], Y=[13:8], and Z=[21:16].
-		 */
-		LLVMValueRef offset[3], pack;
-
-		assert(inst->Texture.NumOffsets == 1);
-
-		for (chan = 0; chan < 3; chan++) {
-			offset[chan] = lp_build_emit_fetch_texoffset(bld_base, inst, 0, chan);
-			offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
-						    LLVMConstInt(ctx->i32, 0x3f, 0), "");
-			if (chan)
-				offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
-							    LLVMConstInt(ctx->i32, chan*8, 0), "");
-		}
-
-		pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
-		pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
-		args.offset = pack;
-	}
-
-	/* Pack LOD bias value */
-	if (opcode == TGSI_OPCODE_TXB)
-		args.bias = args.coords[3];
-	if (opcode == TGSI_OPCODE_TXB2)
-		args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-
-	/* Pack depth comparison value */
-	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
-		LLVMValueRef z;
-
-		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-			z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-		} else {
-			assert(ref_pos >= 0);
-			z = args.coords[ref_pos];
-		}
-
-		/* Section 8.23.1 (Depth Texture Comparison Mode) of the
-		 * OpenGL 4.5 spec says:
-		 *
-		 *    "If the textureâs internal format indicates a fixed-point
-		 *     depth texture, then D_t and D_ref are clamped to the
-		 *     range [0, 1]; otherwise no clamping is performed."
-		 *
-		 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
-		 * so the depth comparison value isn't clamped for Z16 and
-		 * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
-		 * an explicitly clamped 32-bit float format.
-		 */
-		if (ctx->screen->info.chip_class >= GFX8 &&
-		    ctx->screen->info.chip_class <= GFX9) {
-			LLVMValueRef upgraded;
-			LLVMValueRef clamped;
-			upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
-							   LLVMConstInt(ctx->i32, 3, false), "");
-			upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
-						 LLVMConstInt(ctx->i32, 29, false), "");
-			upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
-			clamped = ac_build_clamp(&ctx->ac, z);
-			z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
-		}
-
-		args.compare = z;
-	}
-
-	/* Pack user derivatives */
-	if (opcode == TGSI_OPCODE_TXD) {
-		int param, num_src_deriv_channels, num_dst_deriv_channels;
-
-		switch (target) {
-		case TGSI_TEXTURE_3D:
-			num_src_deriv_channels = 3;
-			num_dst_deriv_channels = 3;
-			break;
-		case TGSI_TEXTURE_2D:
-		case TGSI_TEXTURE_SHADOW2D:
-		case TGSI_TEXTURE_RECT:
-		case TGSI_TEXTURE_SHADOWRECT:
-		case TGSI_TEXTURE_2D_ARRAY:
-		case TGSI_TEXTURE_SHADOW2D_ARRAY:
-			num_src_deriv_channels = 2;
-			num_dst_deriv_channels = 2;
-			break;
-		case TGSI_TEXTURE_CUBE:
-		case TGSI_TEXTURE_SHADOWCUBE:
-		case TGSI_TEXTURE_CUBE_ARRAY:
-		case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
-			/* Cube derivatives will be converted to 2D. */
-			num_src_deriv_channels = 3;
-			num_dst_deriv_channels = 3;
-			break;
-		case TGSI_TEXTURE_1D:
-		case TGSI_TEXTURE_SHADOW1D:
-		case TGSI_TEXTURE_1D_ARRAY:
-		case TGSI_TEXTURE_SHADOW1D_ARRAY:
-			num_src_deriv_channels = 1;
-
-			/* 1D textures are allocated and used as 2D on GFX9. */
-			if (ctx->screen->info.chip_class == GFX9) {
-				num_dst_deriv_channels = 2;
-			} else {
-				num_dst_deriv_channels = 1;
-			}
-			break;
-		default:
-			unreachable("invalid target");
-		}
-
-		for (param = 0; param < 2; param++) {
-			for (chan = 0; chan < num_src_deriv_channels; chan++)
-				args.derivs[param * num_dst_deriv_channels + chan] =
-					lp_build_emit_fetch(bld_base, inst, param+1, chan);
-
-			/* Fill in the rest with zeros. */
-			for (chan = num_src_deriv_channels;
-			     chan < num_dst_deriv_channels; chan++)
-				args.derivs[param * num_dst_deriv_channels + chan] =
-					ctx->ac.f32_0;
-		}
-	}
-
-	if (target == TGSI_TEXTURE_CUBE ||
-	    target == TGSI_TEXTURE_CUBE_ARRAY ||
-	    target == TGSI_TEXTURE_SHADOWCUBE ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-		ac_prepare_cube_coords(&ctx->ac,
-				       opcode == TGSI_OPCODE_TXD,
-				       target == TGSI_TEXTURE_CUBE_ARRAY ||
-				       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
-				       opcode == TGSI_OPCODE_LODQ,
-				       args.coords, args.derivs);
-	} else if (tgsi_is_array_sampler(target) &&
-		   opcode != TGSI_OPCODE_TXF &&
-		   opcode != TGSI_OPCODE_TXF_LZ &&
-		   ctx->screen->info.chip_class <= GFX8) {
-		unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
-		args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]);
-	}
-
-	/* 1D textures are allocated and used as 2D on GFX9. */
-	if (ctx->screen->info.chip_class == GFX9) {
-		LLVMValueRef filler;
-
-		/* Use 0.5, so that we don't sample the border color. */
-		if (opcode == TGSI_OPCODE_TXF ||
-		    opcode == TGSI_OPCODE_TXF_LZ)
-			filler = ctx->i32_0;
-		else
-			filler = LLVMConstReal(ctx->f32, 0.5);
-
-		if (target == TGSI_TEXTURE_1D ||
-		    target == TGSI_TEXTURE_SHADOW1D) {
-			args.coords[1] = filler;
-		} else if (target == TGSI_TEXTURE_1D_ARRAY ||
-			   target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
-			args.coords[2] = args.coords[1];
-			args.coords[1] = filler;
-		}
-	}
-
-	/* Pack LOD or sample index */
-	if (opcode == TGSI_OPCODE_TXL)
-		args.lod = args.coords[3];
-	else if (opcode == TGSI_OPCODE_TXL2)
-		args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-	else if (opcode == TGSI_OPCODE_TXF) {
-		if (target == TGSI_TEXTURE_2D_MSAA) {
-			/* No LOD, but move sample index into the right place. */
-			args.coords[2] = args.coords[3];
-		} else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
-			args.lod = args.coords[3];
-		}
-	}
-
-	if ((target == TGSI_TEXTURE_2D_MSAA ||
-	     target == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
-	    !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-		ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords,
-					 target == TGSI_TEXTURE_2D_ARRAY_MSAA);
-	}
-
-	if (opcode == TGSI_OPCODE_TXF ||
-	    opcode == TGSI_OPCODE_TXF_LZ) {
-		/* add tex offsets */
-		if (inst->Texture.NumOffsets) {
-			const struct tgsi_texture_offset *off = inst->TexOffsets;
-
-			assert(inst->Texture.NumOffsets == 1);
-
-			switch (target) {
-			case TGSI_TEXTURE_3D:
-				args.coords[2] =
-					LLVMBuildAdd(ctx->ac.builder, args.coords[2],
-						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ], "");
-				/* fall through */
-			case TGSI_TEXTURE_2D:
-			case TGSI_TEXTURE_SHADOW2D:
-			case TGSI_TEXTURE_RECT:
-			case TGSI_TEXTURE_SHADOWRECT:
-			case TGSI_TEXTURE_2D_ARRAY:
-			case TGSI_TEXTURE_SHADOW2D_ARRAY:
-				args.coords[1] =
-					LLVMBuildAdd(ctx->ac.builder, args.coords[1],
-						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY], "");
-				/* fall through */
-			case TGSI_TEXTURE_1D:
-			case TGSI_TEXTURE_SHADOW1D:
-			case TGSI_TEXTURE_1D_ARRAY:
-			case TGSI_TEXTURE_SHADOW1D_ARRAY:
-				args.coords[0] =
-					LLVMBuildAdd(ctx->ac.builder, args.coords[0],
-						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX], "");
-				break;
-				/* texture offsets do not apply to other texture targets */
-			}
-		}
-	}
-
-	if (opcode == TGSI_OPCODE_TG4) {
-		unsigned gather_comp = 0;
-
-		/* DMASK was repurposed for GATHER4. 4 components are always
-		 * returned and DMASK works like a swizzle - it selects
-		 * the component to fetch. The only valid DMASK values are
-		 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
-		 * (red,red,red,red) etc.) The ISA document doesn't mention
-		 * this.
-		 */
-
-		/* Get the component index from src1.x for Gather4. */
-		if (!tgsi_is_shadow_target(target)) {
-			LLVMValueRef comp_imm;
-			struct tgsi_src_register src1 = inst->Src[1].Register;
-
-			assert(src1.File == TGSI_FILE_IMMEDIATE);
-
-			comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
-			gather_comp = LLVMConstIntGetZExtValue(comp_imm);
-			gather_comp = CLAMP(gather_comp, 0, 3);
-		}
-
-		args.dmask = 1 << gather_comp;
-	} else {
-		args.dmask = 0xf;
-	}
-
-	args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
-	args.unorm = target == TGSI_TEXTURE_RECT ||
-		     target == TGSI_TEXTURE_SHADOWRECT;
-	args.opcode = ac_image_sample;
-
-	switch (opcode) {
-	case TGSI_OPCODE_TXF:
-	case TGSI_OPCODE_TXF_LZ:
-		args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
-			      target == TGSI_TEXTURE_2D_MSAA ||
-			      target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
-				      ac_image_load : ac_image_load_mip;
-		break;
-	case TGSI_OPCODE_LODQ:
-		args.opcode = ac_image_get_lod;
-		break;
-	case TGSI_OPCODE_TEX:
-	case TGSI_OPCODE_TEX2:
-	case TGSI_OPCODE_TXP:
-		if (ctx->type != PIPE_SHADER_FRAGMENT)
-			args.level_zero = true;
-		break;
-	case TGSI_OPCODE_TEX_LZ:
-		args.level_zero = true;
-		break;
-	case TGSI_OPCODE_TXB:
-	case TGSI_OPCODE_TXB2:
-		assert(ctx->type == PIPE_SHADER_FRAGMENT);
-		break;
-	case TGSI_OPCODE_TXL:
-	case TGSI_OPCODE_TXL2:
-		break;
-	case TGSI_OPCODE_TXD:
-		break;
-	case TGSI_OPCODE_TG4:
-		args.opcode = ac_image_gather4;
-		args.level_zero = true;
-		break;
-	default:
-		assert(0);
-		return;
-	}
-
-	/* The hardware needs special lowering for Gather4 with integer formats. */
-	LLVMValueRef gather4_int_result_workaround = NULL;
-
-	if (ctx->screen->info.chip_class <= GFX8 &&
-	    opcode == TGSI_OPCODE_TG4) {
-		assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
-
-		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
-		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
-			gather4_int_result_workaround =
-				si_lower_gather4_integer(ctx, &args, target,
-							 inst->Texture.ReturnType);
-		}
-	}
-
-	args.attributes = AC_FUNC_ATTR_READNONE;
-	LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
-
-	if (gather4_int_result_workaround) {
-		result = si_fix_gather4_integer_result(ctx, result,
-						       inst->Texture.ReturnType,
-						       gather4_int_result_workaround);
-	}
-
-	emit_data->output[emit_data->chan] = result;
-}
-
-static void si_llvm_emit_txqs(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef rsrc;
-
-	tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
-
-	rsrc = LLVMBuildBitCast(ctx->ac.builder, rsrc, ctx->v8i32, "");
-	emit_data->output[emit_data->chan] =
-		ac_build_image_get_sample_count(&ctx->ac, rsrc);
-}
-
-static LLVMValueRef si_llvm_emit_fbfetch(struct si_shader_context *ctx)
-{
-	struct ac_image_args args = {};
-	LLVMValueRef ptr, image, fmask;
-
-	/* Ignore src0, because KHR_blend_func_extended disallows multiple render
-	 * targets.
-	 */
-
-	/* Load the image descriptor. */
-	STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
-	ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
-	ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
-				   ac_array_in_const32_addr_space(ctx->v8i32), "");
-	image = ac_build_load_to_sgpr(&ctx->ac, ptr,
-			LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
-
-	unsigned chan = 0;
-
-	args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
-
-	if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
-		args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
-
-	/* Get the current render target layer index. */
-	if (ctx->shader->key.mono.u.ps.fbfetch_layered)
-		args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
-
-	if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
-		args.coords[chan++] = si_get_sample_id(ctx);
-
-	if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
-	    !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-		fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
-			LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
-
-		ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
-					 ctx->shader->key.mono.u.ps.fbfetch_layered);
-	}
-
-	args.opcode = ac_image_load;
-	args.resource = image;
-	args.dmask = 0xf;
-	args.attributes = AC_FUNC_ATTR_READNONE;
-
-	if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
-		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
-			ac_image_2darraymsaa : ac_image_2dmsaa;
-	else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
-		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
-			ac_image_1darray : ac_image_1d;
-	else
-		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
-			ac_image_2darray : ac_image_2d;
-
-	return ac_build_image_opcode(&ctx->ac, &args);
-}
-
-static void si_tgsi_emit_fbfetch(const struct lp_build_tgsi_action *action,
-				 struct lp_build_tgsi_context *bld_base,
-				 struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	emit_data->output[emit_data->chan] = si_llvm_emit_fbfetch(ctx);
-}
-
-LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
-{
-	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
-	return si_llvm_emit_fbfetch(ctx);
-}
-
-/**
- * Setup actions for TGSI memory opcode, including texture opcodes.
- */
-void si_shader_context_init_mem(struct si_shader_context *ctx)
-{
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
-	bld_base->op_actions[TGSI_OPCODE_TEX].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TEX_LZ].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TEX2].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXB].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXB2].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXD].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXF].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXF_LZ].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXL].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit;
-	bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic;
-	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
-
-	bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_tgsi_emit_fbfetch;
-
-	bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
-	bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
-	bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
-
-	bld_base->op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
-	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
-	bld_base->op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
-	bld_base->op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
-	bld_base->op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
-	bld_base->op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
-	bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
-	bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
-	bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
-	bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
-	bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].intr_name = "inc";
-	bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].emit = atomic_emit;
-	bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].intr_name = "dec";
-}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
deleted file mode 100644
index 1443432d593..00000000000
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ /dev/null
@@ -1,1165 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "ac_llvm_util.h"
-#include "util/u_memory.h"
-
-struct si_llvm_diagnostics {
-	struct pipe_debug_callback *debug;
-	unsigned retval;
-};
-
-static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
-{
-	struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
-	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
-	const char *severity_str = NULL;
-
-	switch (severity) {
-	case LLVMDSError:
-		severity_str = "error";
-		break;
-	case LLVMDSWarning:
-		severity_str = "warning";
-		break;
-	case LLVMDSRemark:
-	case LLVMDSNote:
-	default:
-		return;
-	}
-
-	char *description = LLVMGetDiagInfoDescription(di);
-
-	pipe_debug_message(diag->debug, SHADER_INFO,
-			   "LLVM diagnostic (%s): %s", severity_str, description);
-
-	if (severity == LLVMDSError) {
-		diag->retval = 1;
-		fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
-	}
-
-	LLVMDisposeMessage(description);
-}
-
-/**
- * Compile an LLVM module to machine code.
- *
- * @returns 0 for success, 1 for failure
- */
-unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
-			 struct ac_llvm_compiler *compiler,
-			 struct pipe_debug_callback *debug,
-			 bool less_optimized, unsigned wave_size)
-{
-	struct ac_compiler_passes *passes = compiler->passes;
-
-	if (wave_size == 32)
-		passes = compiler->passes_wave32;
-	else if (less_optimized && compiler->low_opt_passes)
-		passes = compiler->low_opt_passes;
-
-	struct si_llvm_diagnostics diag;
-	LLVMContextRef llvm_ctx;
-
-	diag.debug = debug;
-	diag.retval = 0;
-
-	/* Setup Diagnostic Handler*/
-	llvm_ctx = LLVMGetModuleContext(M);
-
-	LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
-
-	/* Compile IR. */
-	if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
-				      &binary->elf_size))
-		diag.retval = 1;
-
-	if (diag.retval != 0)
-		pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
-	return diag.retval;
-}
-
-void si_shader_binary_clean(struct si_shader_binary *binary)
-{
-	free((void *)binary->elf_buffer);
-	binary->elf_buffer = NULL;
-
-	free(binary->llvm_ir_string);
-	binary->llvm_ir_string = NULL;
-}
-
-LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
-			  enum tgsi_opcode_type type)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	switch (type) {
-	case TGSI_TYPE_UNSIGNED:
-	case TGSI_TYPE_SIGNED:
-		return ctx->ac.i32;
-	case TGSI_TYPE_UNSIGNED64:
-	case TGSI_TYPE_SIGNED64:
-		return ctx->ac.i64;
-	case TGSI_TYPE_DOUBLE:
-		return ctx->ac.f64;
-	case TGSI_TYPE_UNTYPED:
-	case TGSI_TYPE_FLOAT:
-		return ctx->ac.f32;
-	default: break;
-	}
-	return 0;
-}
-
-LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
-		     enum tgsi_opcode_type type, LLVMValueRef value)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
-
-	if (dst_type)
-		return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
-	else
-		return value;
-}
-
-/**
- * Return a value that is equal to the given i32 \p index if it lies in [0,num)
- * or an undefined value in the same interval otherwise.
- */
-LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
-				 LLVMValueRef index,
-				 unsigned num)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
-	LLVMValueRef cc;
-
-	if (util_is_power_of_two_or_zero(num)) {
-		index = LLVMBuildAnd(builder, index, c_max, "");
-	} else {
-		/* In theory, this MAX pattern should result in code that is
-		 * as good as the bit-wise AND above.
-		 *
-		 * In practice, LLVM generates worse code (at the time of
-		 * writing), because its value tracking is not strong enough.
-		 */
-		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
-		index = LLVMBuildSelect(builder, cc, index, c_max, "");
-	}
-
-	return index;
-}
-
-static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
-				 LLVMValueRef value,
-				 unsigned swizzle_x,
-				 unsigned swizzle_y,
-				 unsigned swizzle_z,
-				 unsigned swizzle_w)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef swizzles[4];
-
-	swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
-	swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
-	swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
-	swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
-
-	return LLVMBuildShuffleVector(ctx->ac.builder,
-				      value,
-				      LLVMGetUndef(LLVMTypeOf(value)),
-				      LLVMConstVector(swizzles, 4), "");
-}
-
-/**
- * Return the description of the array covering the given temporary register
- * index.
- */
-static unsigned
-get_temp_array_id(struct lp_build_tgsi_context *bld_base,
-		  unsigned reg_index,
-		  const struct tgsi_ind_register *reg)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
-	unsigned i;
-
-	if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
-		return reg->ArrayID;
-
-	for (i = 0; i < num_arrays; i++) {
-		const struct tgsi_array_info *array = &ctx->temp_arrays[i];
-
-		if (reg_index >= array->range.First && reg_index <= array->range.Last)
-			return i + 1;
-	}
-
-	return 0;
-}
-
-static struct tgsi_declaration_range
-get_array_range(struct lp_build_tgsi_context *bld_base,
-		unsigned File, unsigned reg_index,
-		const struct tgsi_ind_register *reg)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct tgsi_declaration_range range;
-
-	if (File == TGSI_FILE_TEMPORARY) {
-		unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
-		if (array_id)
-			return ctx->temp_arrays[array_id - 1].range;
-	}
-
-	range.First = 0;
-	range.Last = bld_base->info->file_max[File];
-	return range;
-}
-
-/**
- * For indirect registers, construct a pointer directly to the requested
- * element using getelementptr if possible.
- *
- * Returns NULL if the insertelement/extractelement fallback for array access
- * must be used.
- */
-static LLVMValueRef
-get_pointer_into_array(struct si_shader_context *ctx,
-		       unsigned file,
-		       unsigned swizzle,
-		       unsigned reg_index,
-		       const struct tgsi_ind_register *reg_indirect)
-{
-	unsigned array_id;
-	struct tgsi_array_info *array;
-	LLVMValueRef idxs[2];
-	LLVMValueRef index;
-	LLVMValueRef alloca;
-
-	if (file != TGSI_FILE_TEMPORARY)
-		return NULL;
-
-	array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
-	if (!array_id)
-		return NULL;
-
-	alloca = ctx->temp_array_allocas[array_id - 1];
-	if (!alloca)
-		return NULL;
-
-	array = &ctx->temp_arrays[array_id - 1];
-
-	if (!(array->writemask & (1 << swizzle)))
-		return ctx->undef_alloca;
-
-	index = si_get_indirect_index(ctx, reg_indirect, 1,
-				      reg_index - ctx->temp_arrays[array_id - 1].range.First);
-
-	/* Ensure that the index is within a valid range, to guard against
-	 * VM faults and overwriting critical data (e.g. spilled resource
-	 * descriptors).
-	 *
-	 * TODO It should be possible to avoid the additional instructions
-	 * if LLVM is changed so that it guarantuees:
-	 * 1. the scratch space descriptor isolates the current wave (this
-	 *    could even save the scratch offset SGPR at the cost of an
-	 *    additional SALU instruction)
-	 * 2. the memory for allocas must be allocated at the _end_ of the
-	 *    scratch space (after spilled registers)
-	 */
-	index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
-
-	index = ac_build_imad(&ctx->ac, index,
-			      LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
-			      LLVMConstInt(ctx->i32,
-					   util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0));
-	idxs[0] = ctx->i32_0;
-	idxs[1] = index;
-	return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
-}
-
-LLVMValueRef
-si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
-			 LLVMTypeRef type,
-			 LLVMValueRef ptr,
-			 LLVMValueRef ptr2)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef values[2] = {
-		ac_to_integer(&ctx->ac, ptr),
-		ac_to_integer(&ctx->ac, ptr2),
-	};
-	LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
-	return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
-}
-
-static LLVMValueRef
-emit_array_fetch(struct lp_build_tgsi_context *bld_base,
-		 unsigned File, enum tgsi_opcode_type type,
-		 struct tgsi_declaration_range range,
-		 unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	unsigned i, size = range.Last - range.First + 1;
-	LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
-	LLVMValueRef result = LLVMGetUndef(vec);
-	unsigned swizzle = swizzle_in;
-	struct tgsi_full_src_register tmp_reg = {};
-	tmp_reg.Register.File = File;
-	if (tgsi_type_is_64bit(type))
-		swizzle |= (swizzle_in + 1) << 16;
-
-	for (i = 0; i < size; ++i) {
-		tmp_reg.Register.Index = i + range.First;
-
-		LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
-		result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
-			LLVMConstInt(ctx->i32, i, 0), "array_vector");
-	}
-	return result;
-}
-
-static LLVMValueRef
-load_value_from_array(struct lp_build_tgsi_context *bld_base,
-		      unsigned file,
-		      enum tgsi_opcode_type type,
-		      unsigned swizzle,
-		      unsigned reg_index,
-		      const struct tgsi_ind_register *reg_indirect)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef ptr;
-
-	ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
-	if (ptr) {
-		LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
-		if (tgsi_type_is_64bit(type)) {
-			LLVMValueRef ptr_hi, val_hi;
-			ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
-			val_hi = LLVMBuildLoad(builder, ptr_hi, "");
-			val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-						       val, val_hi);
-		}
-
-		return val;
-	} else {
-		struct tgsi_declaration_range range =
-			get_array_range(bld_base, file, reg_index, reg_indirect);
-		LLVMValueRef index =
-			si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
-		LLVMValueRef array =
-			emit_array_fetch(bld_base, file, type, range, swizzle);
-		return LLVMBuildExtractElement(builder, array, index, "");
-	}
-}
-
-static void
-store_value_to_array(struct lp_build_tgsi_context *bld_base,
-		     LLVMValueRef value,
-		     unsigned file,
-		     unsigned chan_index,
-		     unsigned reg_index,
-		     const struct tgsi_ind_register *reg_indirect)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef ptr;
-
-	ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
-	if (ptr) {
-		LLVMBuildStore(builder, value, ptr);
-	} else {
-		unsigned i, size;
-		struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
-		LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
-		LLVMValueRef array =
-			emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
-		LLVMValueRef temp_ptr;
-
-		array = LLVMBuildInsertElement(builder, array, value, index, "");
-
-		size = range.Last - range.First + 1;
-		for (i = 0; i < size; ++i) {
-			switch(file) {
-			case TGSI_FILE_OUTPUT:
-				temp_ptr = ctx->outputs[i + range.First][chan_index];
-				break;
-
-			case TGSI_FILE_TEMPORARY:
-				if (range.First + i >= ctx->temps_count)
-					continue;
-				temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
-				break;
-
-			default:
-				continue;
-			}
-			value = LLVMBuildExtractElement(builder, array,
-				LLVMConstInt(ctx->i32, i, 0), "");
-			LLVMBuildStore(builder, value, temp_ptr);
-		}
-	}
-}
-
-/* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
- * reload them at each use. This must be true if the shader is using
- * derivatives and KILL, because KILL can leave the WQM and then a lazy
- * input load isn't in the WQM anymore.
- */
-static bool si_preload_fs_inputs(struct si_shader_context *ctx)
-{
-	struct si_shader_selector *sel = ctx->shader->selector;
-
-	return sel->info.uses_derivatives &&
-	       sel->info.uses_kill;
-}
-
-static LLVMValueRef
-get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
-	       unsigned chan)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
-	return ctx->outputs[index][chan];
-}
-
-LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
-				const struct tgsi_full_src_register *reg,
-				enum tgsi_opcode_type type,
-				unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef result = NULL, ptr, ptr2;
-	unsigned swizzle = swizzle_in & 0xffff;
-
-	if (swizzle_in == ~0) {
-		LLVMValueRef values[TGSI_NUM_CHANNELS];
-		unsigned chan;
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-			values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
-		}
-		return ac_build_gather_values(&ctx->ac, values,
-					      TGSI_NUM_CHANNELS);
-	}
-
-	if (reg->Register.Indirect) {
-		LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
-				swizzle, reg->Register.Index, &reg->Indirect);
-		return bitcast(bld_base, type, load);
-	}
-
-	switch(reg->Register.File) {
-	case TGSI_FILE_IMMEDIATE: {
-		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
-		if (tgsi_type_is_64bit(type)) {
-			result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
-			result = LLVMConstInsertElement(result,
-							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
-							ctx->i32_0);
-			result = LLVMConstInsertElement(result,
-							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
-							ctx->i32_1);
-			return LLVMConstBitCast(result, ctype);
-		} else {
-			return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
-		}
-	}
-
-	case TGSI_FILE_INPUT: {
-		unsigned index = reg->Register.Index;
-		LLVMValueRef input[4];
-
-		/* I don't think doing this for vertex shaders is beneficial.
-		 * For those, we want to make sure the VMEM loads are executed
-		 * only once. Fragment shaders don't care much, because
-		 * v_interp instructions are much cheaper than VMEM loads.
-		 */
-		if (!si_preload_fs_inputs(ctx) &&
-		    ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
-			ctx->load_input(ctx, index, &ctx->input_decls[index], input);
-		else
-			memcpy(input, &ctx->inputs[index * 4], sizeof(input));
-
-		result = input[swizzle];
-
-		if (tgsi_type_is_64bit(type)) {
-			ptr = result;
-			ptr2 = input[swizzle_in >> 16];
-			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-							ptr, ptr2);
-		}
-		break;
-	}
-
-	case TGSI_FILE_TEMPORARY:
-		if (reg->Register.Index >= ctx->temps_count)
-			return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
-		ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
-		if (tgsi_type_is_64bit(type)) {
-			ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
-			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-							LLVMBuildLoad(builder, ptr, ""),
-							LLVMBuildLoad(builder, ptr2, ""));
-		}
-		result = LLVMBuildLoad(builder, ptr, "");
-		break;
-
-	case TGSI_FILE_OUTPUT:
-		ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
-		if (tgsi_type_is_64bit(type)) {
-			ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
-			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-							LLVMBuildLoad(builder, ptr, ""),
-							LLVMBuildLoad(builder, ptr2, ""));
-		}
-		result = LLVMBuildLoad(builder, ptr, "");
-		break;
-
-	default:
-		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
-	}
-
-	return bitcast(bld_base, type, result);
-}
-
-static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
-				       const struct tgsi_full_src_register *reg,
-				       enum tgsi_opcode_type type,
-				       unsigned swizzle_in)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef cval = ctx->system_values[reg->Register.Index];
-	unsigned swizzle = swizzle_in & 0xffff;
-
-	if (tgsi_type_is_64bit(type)) {
-		LLVMValueRef lo, hi;
-
-		assert(swizzle == 0 || swizzle == 2);
-
-		lo = LLVMBuildExtractElement(
-			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
-		hi = LLVMBuildExtractElement(
-			builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
-
-		return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-						lo, hi);
-	}
-
-	if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
-		cval = LLVMBuildExtractElement(
-			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
-	} else {
-		assert(swizzle == 0);
-	}
-
-	return bitcast(bld_base, type, cval);
-}
-
-static void emit_declaration(struct lp_build_tgsi_context *bld_base,
-			     const struct tgsi_full_declaration *decl)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	unsigned first, last, i;
-	switch(decl->Declaration.File) {
-	case TGSI_FILE_ADDRESS:
-	{
-		 unsigned idx;
-		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-			unsigned chan;
-			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-				 ctx->addrs[idx][chan] = ac_build_alloca_undef(
-					&ctx->ac, ctx->i32, "");
-			}
-		}
-		break;
-	}
-
-	case TGSI_FILE_TEMPORARY:
-	{
-		char name[18] = "";
-		LLVMValueRef array_alloca = NULL;
-		unsigned decl_size;
-		unsigned writemask = decl->Declaration.UsageMask;
-		first = decl->Range.First;
-		last = decl->Range.Last;
-		decl_size = 4 * ((last - first) + 1);
-
-		if (decl->Declaration.Array) {
-			unsigned id = decl->Array.ArrayID - 1;
-			unsigned array_size;
-
-			writemask &= ctx->temp_arrays[id].writemask;
-			ctx->temp_arrays[id].writemask = writemask;
-			array_size = ((last - first) + 1) * util_bitcount(writemask);
-
-			/* If the array has more than 16 elements, store it
-			 * in memory using an alloca that spans the entire
-			 * array.
-			 *
-			 * Otherwise, store each array element individually.
-			 * We will then generate vectors (per-channel, up to
-			 * <16 x float> if the usagemask is a single bit) for
-			 * indirect addressing.
-			 *
-			 * Note that 16 is the number of vector elements that
-			 * LLVM will store in a register, so theoretically an
-			 * array with up to 4 * 16 = 64 elements could be
-			 * handled this way, but whether that's a good idea
-			 * depends on VGPR register pressure elsewhere.
-			 *
-			 * FIXME: We shouldn't need to have the non-alloca
-			 * code path for arrays. LLVM should be smart enough to
-			 * promote allocas into registers when profitable.
-			 */
-			if (array_size > 16 ||
-			    !ctx->screen->llvm_has_working_vgpr_indexing) {
-				array_alloca = ac_build_alloca_undef(&ctx->ac,
-					LLVMArrayType(ctx->f32,
-						      array_size), "array");
-				ctx->temp_array_allocas[id] = array_alloca;
-			}
-		}
-
-		if (!ctx->temps_count) {
-			ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
-			ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
-		}
-		if (!array_alloca) {
-			for (i = 0; i < decl_size; ++i) {
-#ifndef NDEBUG
-				snprintf(name, sizeof(name), "TEMP%d.%c",
-					 first + i / 4, "xyzw"[i % 4]);
-#endif
-				ctx->temps[first * TGSI_NUM_CHANNELS + i] =
-					ac_build_alloca_undef(&ctx->ac,
-							      ctx->f32,
-							      name);
-			}
-		} else {
-			LLVMValueRef idxs[2] = {
-				ctx->i32_0,
-				NULL
-			};
-			unsigned j = 0;
-
-			if (writemask != TGSI_WRITEMASK_XYZW &&
-			    !ctx->undef_alloca) {
-				/* Create a dummy alloca. We use it so that we
-				 * have a pointer that is safe to load from if
-				 * a shader ever reads from a channel that
-				 * it never writes to.
-				 */
-				ctx->undef_alloca = ac_build_alloca_undef(
-					&ctx->ac, ctx->f32, "undef");
-			}
-
-			for (i = 0; i < decl_size; ++i) {
-				LLVMValueRef ptr;
-				if (writemask & (1 << (i % 4))) {
-#ifndef NDEBUG
-					snprintf(name, sizeof(name), "TEMP%d.%c",
-						 first + i / 4, "xyzw"[i % 4]);
-#endif
-					idxs[1] = LLVMConstInt(ctx->i32, j, 0);
-					ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
-					j++;
-				} else {
-					ptr = ctx->undef_alloca;
-				}
-				ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
-			}
-		}
-		break;
-	}
-	case TGSI_FILE_INPUT:
-	{
-		unsigned idx;
-		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-			if (ctx->load_input &&
-			    ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
-				ctx->input_decls[idx] = *decl;
-				ctx->input_decls[idx].Range.First = idx;
-				ctx->input_decls[idx].Range.Last = idx;
-				ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
-
-				if (si_preload_fs_inputs(ctx) ||
-				    bld_base->info->processor != PIPE_SHADER_FRAGMENT)
-					ctx->load_input(ctx, idx, &ctx->input_decls[idx],
-							&ctx->inputs[idx * 4]);
-			}
-		}
-	}
-	break;
-
-	case TGSI_FILE_SYSTEM_VALUE:
-	{
-		unsigned idx;
-		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-			si_load_system_value(ctx, idx, decl);
-		}
-	}
-	break;
-
-	case TGSI_FILE_OUTPUT:
-	{
-		char name[16] = "";
-		unsigned idx;
-		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-			unsigned chan;
-			assert(idx < RADEON_LLVM_MAX_OUTPUTS);
-			if (ctx->outputs[idx][0])
-				continue;
-			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-#ifndef NDEBUG
-				snprintf(name, sizeof(name), "OUT%d.%c",
-					 idx, "xyzw"[chan % 4]);
-#endif
-				ctx->outputs[idx][chan] = ac_build_alloca_undef(
-					&ctx->ac, ctx->f32, name);
-			}
-		}
-		break;
-	}
-
-	case TGSI_FILE_MEMORY:
-		si_tgsi_declare_compute_memory(ctx, decl);
-		break;
-
-	default:
-		break;
-	}
-}
-
-void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
-			const struct tgsi_full_instruction *inst,
-			const struct tgsi_opcode_info *info,
-			unsigned index,
-			LLVMValueRef dst[4])
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef temp_ptr, temp_ptr2 = NULL;
-	bool is_vec_store = false;
-	enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
-
-	if (dst[0]) {
-		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
-		is_vec_store = (k == LLVMVectorTypeKind);
-	}
-
-	if (is_vec_store) {
-		LLVMValueRef values[4] = {};
-		uint32_t writemask = reg->Register.WriteMask;
-		while (writemask) {
-			unsigned chan = u_bit_scan(&writemask);
-			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
-			values[chan]  = LLVMBuildExtractElement(ctx->ac.builder,
-							dst[0], index, "");
-		}
-		bld_base->emit_store(bld_base, inst, info, index, values);
-		return;
-	}
-
-	uint32_t writemask = reg->Register.WriteMask;
-	while (writemask) {
-		unsigned chan_index = u_bit_scan(&writemask);
-		LLVMValueRef value = dst[chan_index];
-
-		if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
-			continue;
-		if (inst->Instruction.Saturate)
-			value = ac_build_clamp(&ctx->ac, value);
-
-		if (reg->Register.File == TGSI_FILE_ADDRESS) {
-			temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
-			LLVMBuildStore(builder, value, temp_ptr);
-			continue;
-		}
-
-		if (!tgsi_type_is_64bit(dtype))
-			value = ac_to_float(&ctx->ac, value);
-
-		if (reg->Register.Indirect) {
-			unsigned file = reg->Register.File;
-			unsigned reg_index = reg->Register.Index;
-			store_value_to_array(bld_base, value, file, chan_index,
-					     reg_index, &reg->Indirect);
-		} else {
-			switch(reg->Register.File) {
-			case TGSI_FILE_OUTPUT:
-				temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
-				if (tgsi_type_is_64bit(dtype))
-					temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
-				break;
-
-			case TGSI_FILE_TEMPORARY:
-			{
-				if (reg->Register.Index >= ctx->temps_count)
-					continue;
-
-				temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
-				if (tgsi_type_is_64bit(dtype))
-					temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
-
-				break;
-			}
-			default:
-				return;
-			}
-			if (!tgsi_type_is_64bit(dtype))
-				LLVMBuildStore(builder, value, temp_ptr);
-			else {
-				LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
-								    LLVMVectorType(ctx->i32, 2), "");
-				LLVMValueRef val2;
-				value = LLVMBuildExtractElement(builder, ptr,
-								ctx->i32_0, "");
-				val2 = LLVMBuildExtractElement(builder, ptr,
-							       ctx->i32_1, "");
-
-				LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
-				LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
-			}
-		}
-	}
-}
-
-static int get_line(int pc)
-{
-	/* Subtract 1 so that the number shown is that of the corresponding
-	 * opcode in the TGSI dump, e.g. an if block has the same suffix as
-	 * the instruction number of the corresponding TGSI IF.
-	 */
-	return pc - 1;
-}
-
-static void bgnloop_emit(const struct lp_build_tgsi_action *action,
-			 struct lp_build_tgsi_context *bld_base,
-			 struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void brk_emit(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_break(&ctx->ac);
-}
-
-static void cont_emit(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_continue(&ctx->ac);
-}
-
-static void else_emit(const struct lp_build_tgsi_action *action,
-		      struct lp_build_tgsi_context *bld_base,
-		      struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_else(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void endif_emit(const struct lp_build_tgsi_action *action,
-		       struct lp_build_tgsi_context *bld_base,
-		       struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_endif(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void endloop_emit(const struct lp_build_tgsi_action *action,
-			 struct lp_build_tgsi_context *bld_base,
-			 struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void if_emit(const struct lp_build_tgsi_action *action,
-		    struct lp_build_tgsi_context *bld_base,
-		    struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
-}
-
-static void uif_emit(const struct lp_build_tgsi_action *action,
-		     struct lp_build_tgsi_context *bld_base,
-		     struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
-}
-
-static void emit_immediate(struct lp_build_tgsi_context *bld_base,
-			   const struct tgsi_full_immediate *imm)
-{
-	unsigned i;
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-
-	for (i = 0; i < 4; ++i) {
-		ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
-				LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
-	}
-
-	ctx->imms_num++;
-}
-
-void si_llvm_context_init(struct si_shader_context *ctx,
-			  struct si_screen *sscreen,
-			  struct ac_llvm_compiler *compiler,
-			  unsigned wave_size,
-			  unsigned ballot_mask_bits)
-{
-	struct lp_type type;
-
-	/* Initialize the gallivm object:
-	 * We are only using the module, context, and builder fields of this struct.
-	 * This should be enough for us to be able to pass our gallivm struct to the
-	 * helper functions in the gallivm module.
-	 */
-	memset(ctx, 0, sizeof(*ctx));
-	ctx->screen = sscreen;
-	ctx->compiler = compiler;
-
-	ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
-			     sscreen->info.family,
-			     AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
-			     wave_size, ballot_mask_bits);
-
-	ctx->gallivm.context = ctx->ac.context;
-	ctx->gallivm.module = ctx->ac.module;
-	ctx->gallivm.builder = ctx->ac.builder;
-
-	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
-	type.floating = true;
-	type.fixed = false;
-	type.sign = true;
-	type.norm = false;
-	type.width = 32;
-	type.length = 1;
-
-	lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
-	lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
-	lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
-	type.width *= 2;
-	lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
-	lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
-	lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
-
-	bld_base->soa = 1;
-	bld_base->emit_swizzle = emit_swizzle;
-	bld_base->emit_declaration = emit_declaration;
-	bld_base->emit_immediate = emit_immediate;
-
-	bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
-	bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
-	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
-	bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
-	bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
-	bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
-	bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
-	bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
-
-	si_shader_context_init_alu(ctx);
-	si_shader_context_init_mem(ctx);
-
-	ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
-	ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
-	ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
-	ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
-	ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
-	ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
-	ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
-	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
-	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
-	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
-	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
-
-	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
-	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
-	ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
-	ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
-}
-
-/* Set the context to a certain TGSI shader. Can be called repeatedly
- * to change the shader. */
-void si_llvm_context_set_ir(struct si_shader_context *ctx,
-			    struct si_shader *shader,
-			    struct nir_shader *nir)
-{
-	struct si_shader_selector *sel = shader->selector;
-	const struct tgsi_shader_info *info = &sel->info;
-
-	ctx->shader = shader;
-	ctx->type = sel->type;
-	ctx->bld_base.info = info;
-
-	/* Clean up the old contents. */
-	FREE(ctx->temp_arrays);
-	ctx->temp_arrays = NULL;
-	FREE(ctx->temp_array_allocas);
-	ctx->temp_array_allocas = NULL;
-
-	FREE(ctx->imms);
-	ctx->imms = NULL;
-	ctx->imms_num = 0;
-
-	FREE(ctx->temps);
-	ctx->temps = NULL;
-	ctx->temps_count = 0;
-
-	ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
-	ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
-
-	ctx->num_samplers = util_last_bit(info->samplers_declared);
-	ctx->num_images = util_last_bit(info->images_declared);
-
-	if (nir)
-		return;
-
-	if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
-		int size = info->array_max[TGSI_FILE_TEMPORARY];
-
-		ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
-		ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
-
-		tgsi_scan_arrays(sel->tokens, TGSI_FILE_TEMPORARY, size,
-				 ctx->temp_arrays);
-	}
-	if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
-		int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
-		ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
-	}
-
-	/* Re-set these to start with a clean slate. */
-	ctx->bld_base.num_instructions = 0;
-	ctx->bld_base.pc = 0;
-	memset(ctx->input_decls, 0, sizeof(ctx->input_decls));
-	memset(ctx->inputs, 0, sizeof(ctx->inputs));
-	memset(ctx->outputs, 0, sizeof(ctx->outputs));
-
-	ctx->bld_base.emit_store = si_llvm_emit_store;
-	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
-	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
-	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
-	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
-	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
-}
-
-void si_llvm_create_func(struct si_shader_context *ctx,
-			 const char *name,
-			 LLVMTypeRef *return_types, unsigned num_return_elems)
-{
-	LLVMTypeRef ret_type;
-	enum ac_llvm_calling_convention call_conv;
-	enum pipe_shader_type real_shader_type;
-
-	if (num_return_elems)
-		ret_type = LLVMStructTypeInContext(ctx->ac.context,
-						   return_types,
-						   num_return_elems, true);
-	else
-		ret_type = ctx->voidt;
-
-	real_shader_type = ctx->type;
-
-	/* LS is merged into HS (TCS), and ES is merged into GS. */
-	if (ctx->screen->info.chip_class >= GFX9) {
-		if (ctx->shader->key.as_ls)
-			real_shader_type = PIPE_SHADER_TESS_CTRL;
-		else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
-			real_shader_type = PIPE_SHADER_GEOMETRY;
-	}
-
-	switch (real_shader_type) {
-	case PIPE_SHADER_VERTEX:
-	case PIPE_SHADER_TESS_EVAL:
-		call_conv = AC_LLVM_AMDGPU_VS;
-		break;
-	case PIPE_SHADER_TESS_CTRL:
-		call_conv = AC_LLVM_AMDGPU_HS;
-		break;
-	case PIPE_SHADER_GEOMETRY:
-		call_conv = AC_LLVM_AMDGPU_GS;
-		break;
-	case PIPE_SHADER_FRAGMENT:
-		call_conv = AC_LLVM_AMDGPU_PS;
-		break;
-	case PIPE_SHADER_COMPUTE:
-		call_conv = AC_LLVM_AMDGPU_CS;
-		break;
-	default:
-		unreachable("Unhandle shader type");
-	}
-
-	/* Setup the function */
-	ctx->return_type = ret_type;
-	ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
-				     ret_type, ctx->gallivm.module);
-}
-
-void si_llvm_optimize_module(struct si_shader_context *ctx)
-{
-	/* Dump LLVM IR before any optimization passes */
-	if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
-	    si_can_dump_shader(ctx->screen, ctx->type))
-		LLVMDumpModule(ctx->gallivm.module);
-
-	/* Run the pass */
-	LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
-	LLVMDisposeBuilder(ctx->ac.builder);
-}
-
-void si_llvm_dispose(struct si_shader_context *ctx)
-{
-	LLVMDisposeModule(ctx->gallivm.module);
-	LLVMContextDispose(ctx->gallivm.context);
-	FREE(ctx->temp_arrays);
-	ctx->temp_arrays = NULL;
-	FREE(ctx->temp_array_allocas);
-	ctx->temp_array_allocas = NULL;
-	FREE(ctx->temps);
-	ctx->temps = NULL;
-	ctx->temps_count = 0;
-	FREE(ctx->imms);
-	ctx->imms = NULL;
-	ctx->imms_num = 0;
-	ac_llvm_context_dispose(&ctx->ac);
-}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 941a397525e..bf4a22de13b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -27,7 +27,6 @@
 
 #include "compiler/nir/nir_serialize.h"
 #include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
 #include "util/u_async_debug.h"
@@ -51,11 +50,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
 	unsigned ir_size;
 	void *ir_binary;
 
-	if (sel->tokens) {
-		ir_binary = sel->tokens;
-		ir_size = tgsi_num_tokens(sel->tokens) *
-			  sizeof(struct tgsi_token);
-	} else if (sel->nir_binary) {
+	if (sel->nir_binary) {
 		ir_binary = sel->nir_binary;
 		ir_size = sel->nir_size;
 	} else {
@@ -2153,7 +2148,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
 		main_part->key.as_ngg = key->as_ngg;
 		main_part->is_monolithic = false;
 
-		if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
+		if (si_compile_shader(sscreen, compiler_state->compiler,
 					   main_part, &compiler_state->debug) != 0) {
 			FREE(main_part);
 			return false;
@@ -2516,7 +2511,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 		     sel->type == PIPE_SHADER_GEOMETRY))
 			shader->key.as_ngg = 1;
 
-		if (sel->tokens || sel->nir) {
+		if (sel->nir) {
 			si_get_ir_cache_key(sel, shader->key.as_ngg,
 					    shader->key.as_es, ir_sha1_cache_key);
 		}
@@ -2531,7 +2526,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 			simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
 			/* Compile the shader if it hasn't been loaded from the cache. */
-			if (si_compile_tgsi_shader(sscreen, compiler, shader,
+			if (si_compile_shader(sscreen, compiler, shader,
 						   debug) != 0) {
 				FREE(shader);
 				fprintf(stderr, "radeonsi: can't compile a main shader part\n");
@@ -2695,44 +2690,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
 	sel->so = state->stream_output;
 
-	if (state->type == PIPE_SHADER_IR_TGSI &&
-	    !sscreen->options.enable_nir) {
-		sel->tokens = tgsi_dup_tokens(state->tokens);
-		if (!sel->tokens) {
-			FREE(sel);
-			return NULL;
-		}
-
-		tgsi_scan_shader(state->tokens, &sel->info);
-		tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
-		/* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
-		if (sel->info.uses_persp_opcode_interp_centroid)
-			sel->info.uses_persp_centroid = true;
-
-		if (sel->info.uses_linear_opcode_interp_centroid)
-			sel->info.uses_linear_centroid = true;
-
-		if (sel->info.uses_persp_opcode_interp_offset ||
-		    sel->info.uses_persp_opcode_interp_sample)
-			sel->info.uses_persp_center = true;
-
-		if (sel->info.uses_linear_opcode_interp_offset ||
-		    sel->info.uses_linear_opcode_interp_sample)
-			sel->info.uses_linear_center = true;
+	if (state->type == PIPE_SHADER_IR_TGSI) {
+		sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
 	} else {
-		if (state->type == PIPE_SHADER_IR_TGSI) {
-			sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
-		} else {
-			assert(state->type == PIPE_SHADER_IR_NIR);
-			sel->nir = state->ir.nir;
-		}
-
-		si_nir_scan_shader(sel->nir, &sel->info);
-		si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
-		si_nir_adjust_driver_locations(sel->nir);
+		assert(state->type == PIPE_SHADER_IR_NIR);
+		sel->nir = state->ir.nir;
 	}
 
+	si_nir_scan_shader(sel->nir, &sel->info);
+	si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+	si_nir_adjust_driver_locations(sel->nir);
+
 	sel->type = sel->info.processor;
 	p_atomic_inc(&sscreen->num_shaders_created);
 	si_get_active_slot_masks(&sel->info,
@@ -3304,7 +3272,6 @@ void si_destroy_shader_selector(struct si_context *sctx,
 
 	util_queue_fence_destroy(&sel->ready);
 	simple_mtx_destroy(&sel->mutex);
-	free(sel->tokens);
 	ralloc_free(sel->nir);
 	free(sel->nir_binary);
 	free(sel);
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index ffec8770823..e5f333942b7 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -552,12 +552,6 @@ TODO: document the other workarounds.
         <application name="Rocket League" executable="RocketLeague">
             <option name="radeonsi_zerovram" value="true" />
         </application>
-        <application name="Civilization 6" executable="Civ6">
-            <option name="radeonsi_enable_nir" value="true"/>
-        </application>
-        <application name="Civilization 6" executable="Civ6Sub">
-            <option name="radeonsi_enable_nir" value="true"/>
-        </application>
         <application name="DiRT Rally" executable="DirtRally">
             <option name="radeonsi_prim_restart_tri_strips_only" value="true"/>
         </application>
-- 
2.30.2