radeonsi: remove TGSI
authorMarek Olšák <marek.olsak@amd.com>
Sat, 4 Jan 2020 04:15:27 +0000 (23:15 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 6 Jan 2020 20:57:20 +0000 (15:57 -0500)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
16 files changed:
src/gallium/drivers/radeonsi/Makefile.sources
src/gallium/drivers/radeonsi/meson.build
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_debug_options.h
src/gallium/drivers/radeonsi/si_get.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_llvm.c [new file with mode: 0644]
src/gallium/drivers/radeonsi/si_shader_llvm_build.c [new file with mode: 0644]
src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c [deleted file]
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c [deleted file]
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c [deleted file]
src/gallium/drivers/radeonsi/si_state_shaders.c
src/util/00-mesa-defaults.conf

index 886aaf6fa3415e54217a01d66ba972060129e173..5d658b744d09e1884f14f20216b9393ad9ee10ea 100644 (file)
@@ -35,10 +35,9 @@ C_SOURCES := \
        si_shader.c \
        si_shader.h \
        si_shader_internal.h \
+       si_shader_llvm.c \
+       si_shader_llvm_build.c \
        si_shader_nir.c \
-       si_shader_tgsi_alu.c \
-       si_shader_tgsi_mem.c \
-       si_shader_tgsi_setup.c \
        si_shaderlib_tgsi.c \
        si_state.c \
        si_state_binning.c \
index d2d3dd684b0cc5516891c0f5781177c4f355b955..a0bd10f6ac945d2afc0546bee7115d2516144c82 100644 (file)
@@ -50,10 +50,9 @@ files_libradeonsi = files(
   'si_shader.c',
   'si_shader.h',
   'si_shader_internal.h',
+  'si_shader_llvm.c',
+  'si_shader_llvm_build.c',
   'si_shader_nir.c',
-  'si_shader_tgsi_alu.c',
-  'si_shader_tgsi_mem.c',
-  'si_shader_tgsi_setup.c',
   'si_shaderlib_tgsi.c',
   'si_state.c',
   'si_state.h',
index 7abea1927cdbb0de73c4723ec51a783dcae34cb4..f264b880d291defd784bc5ea76af7a172bf8ec0a 100644 (file)
@@ -24,7 +24,6 @@
  */
 
 #include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
 #include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
@@ -124,13 +123,8 @@ static void si_create_compute_state_async(void *job, int thread_index)
        if (!compiler->passes)
                si_init_compiler(sscreen, compiler);
 
-       if (program->ir_type == PIPE_SHADER_IR_TGSI) {
-               tgsi_scan_shader(sel->tokens, &sel->info);
-       } else {
-               assert(program->ir_type == PIPE_SHADER_IR_NIR);
-
-               si_nir_scan_shader(sel->nir, &sel->info);
-       }
+       assert(program->ir_type == PIPE_SHADER_IR_NIR);
+       si_nir_scan_shader(sel->nir, &sel->info);
 
        /* Store the declared LDS size into tgsi_shader_info for the shader
         * cache to include it.
@@ -167,9 +161,6 @@ static void si_create_compute_state_async(void *job, int thread_index)
 
                if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
                        program->shader.compilation_failed = true;
-
-                       if (program->ir_type == PIPE_SHADER_IR_TGSI)
-                               FREE(sel->tokens);
                        return;
                }
 
@@ -209,8 +200,6 @@ static void si_create_compute_state_async(void *job, int thread_index)
                simple_mtx_unlock(&sscreen->shader_cache_mutex);
        }
 
-       FREE(sel->tokens);
-       sel->tokens = NULL;
        ralloc_free(sel->nir);
        sel->nir = NULL;
 }
@@ -234,16 +223,9 @@ static void *si_create_compute_state(
        program->input_size = cso->req_input_mem;
 
        if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
-               if (sscreen->options.enable_nir &&
-                   cso->ir_type == PIPE_SHADER_IR_TGSI) {
+               if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
                        program->ir_type = PIPE_SHADER_IR_NIR;
                        sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
-               } else if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
-                       sel->tokens = tgsi_dup_tokens(cso->prog);
-                       if (!sel->tokens) {
-                               FREE(program);
-                               return NULL;
-                       }
                } else {
                        assert(cso->ir_type == PIPE_SHADER_IR_NIR);
                        sel->nir = (struct nir_shader *) cso->prog;
@@ -719,8 +701,8 @@ static bool si_upload_compute_input(struct si_context *sctx,
        return true;
 }
 
-static void si_setup_tgsi_user_data(struct si_context *sctx,
-                                const struct pipe_grid_info *info)
+static void si_setup_nir_user_data(struct si_context *sctx,
+                                  const struct pipe_grid_info *info)
 {
        struct si_compute *program = sctx->cs_shader_state.program;
        struct si_shader_selector *sel = &program->sel;
@@ -944,7 +926,7 @@ static void si_launch_grid(
        }
 
        if (program->ir_type != PIPE_SHADER_IR_NATIVE)
-               si_setup_tgsi_user_data(sctx, info);
+               si_setup_nir_user_data(sctx, info);
 
        si_emit_dispatch_packets(sctx, info);
 
@@ -977,7 +959,6 @@ void si_destroy_compute(struct si_compute *program)
        FREE(program->global_buffers);
 
        si_shader_destroy(&program->shader);
-       FREE(program->sel.tokens);
        ralloc_free(program->sel.nir);
        FREE(program);
 }
index 7ba835acf847d29707db45b117926d98e4b92719..9a0dd0c9f78bb9c522564c972e3bb58c9a4f06ae 100644 (file)
@@ -1,5 +1,4 @@
 OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth clear")
-OPT_BOOL(enable_nir, true, "Enable NIR")
 OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context")
 OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause stalls)")
 OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of ddebug_dumps")
index c34c8649bcfc394c7e22a03999df8e09f8f1b5f5..1adbafda53a6048d9fcef9ac5abb64f587354504 100644 (file)
@@ -159,6 +159,9 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_IMAGE_LOAD_FORMATTED:
        case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA:
        case PIPE_CAP_TGSI_DIV:
+       case PIPE_CAP_PACKED_UNIFORMS:
+       case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
+       case PIPE_CAP_GL_SPIRV:
                return 1;
 
        case PIPE_CAP_QUERY_SO_OVERFLOW:
@@ -195,7 +198,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
                if (!sscreen->info.has_indirect_compute_dispatch)
                        return 420;
-               return sscreen->options.enable_nir ? 460 : 450;
+               return 460;
 
        case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
                /* Optimal number for good TexSubImage performance on Polaris10. */
@@ -214,15 +217,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                return sscreen->info.has_sparse_vm_mappings ?
                                RADEON_SPARSE_PAGE_SIZE : 0;
 
-       case PIPE_CAP_PACKED_UNIFORMS:
-       case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
-       case PIPE_CAP_GL_SPIRV:
-               return sscreen->options.enable_nir;
-
-       case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
-               if (sscreen->options.enable_nir)
-                       return 0;
-               return 1;
 
        /* Unsupported features. */
        case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
@@ -246,6 +240,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
        case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
        case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
+       case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
                return 0;
 
        case PIPE_CAP_FENCE_SIGNAL:
@@ -395,14 +390,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
                        int ir = 1 << PIPE_SHADER_IR_NATIVE;
 
                        if (sscreen->info.has_indirect_compute_dispatch)
-                               ir |= 1 << PIPE_SHADER_IR_TGSI;
+                               ir |= 1 << PIPE_SHADER_IR_NIR;
 
                        return ir;
                }
 
                case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
                        uint64_t max_const_buffer_size;
-                       pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
+                       pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_NIR,
                                PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
                                &max_const_buffer_size);
                        return MIN2(max_const_buffer_size, INT_MAX);
@@ -444,13 +439,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
                return SI_NUM_IMAGES;
        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
-               if (sscreen->options.enable_nir)
-                       return 0;
-               return 32;
+               return 0;
        case PIPE_SHADER_CAP_PREFERRED_IR:
-               if (sscreen->options.enable_nir)
-                       return PIPE_SHADER_IR_NIR;
-               return PIPE_SHADER_IR_TGSI;
+               return PIPE_SHADER_IR_NIR;
        case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
                return 4;
 
index 2e3232d1cf0920d033bba759269d132271914276..755c768fb0b5894de999b662338abfce94dec25b 100644 (file)
@@ -660,7 +660,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        }
 
        uint64_t max_threads_per_block;
-       screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+       screen->get_compute_param(screen, PIPE_SHADER_IR_NIR,
                                  PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
                                  &max_threads_per_block);
 
@@ -910,10 +910,6 @@ static void si_disk_cache_create(struct si_screen *sscreen)
        /* These flags affect shader compilation. */
        #define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
        uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
-       /* Reserve left-most bit for tgsi/nir selector */
-       assert(!(shader_debug_flags & (1u << 31)));
-       shader_debug_flags |= (uint32_t)
-               ((sscreen->options.enable_nir & 0x1) << 31);
 
        /* Add the high bits of 32-bit addresses, which affects
         * how 32-bit addresses are expanded to 64 bits.
index e6678e026cd4691b730c0091d9e0e5f78ae10d09..65a070b4570ea72a370b9fc65e51561ad3be6af2 100644 (file)
 #include <llvm/Config/llvm-config.h>
 
 #include "util/u_memory.h"
-#include "util/u_string.h"
-#include "tgsi/tgsi_build.h"
 #include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_from_mesa.h"
 
-#include "ac_binary.h"
 #include "ac_exp_param.h"
 #include "ac_shader_util.h"
 #include "ac_rtld.h"
@@ -50,15 +45,7 @@ static const char scratch_rsrc_dword0_symbol[] =
 static const char scratch_rsrc_dword1_symbol[] =
        "SCRATCH_RSRC_DWORD1";
 
-static void si_init_shader_ctx(struct si_shader_context *ctx,
-                              struct si_screen *sscreen,
-                              struct ac_llvm_compiler *compiler,
-                              unsigned wave_size,
-                              bool nir);
-
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
-                                struct lp_build_tgsi_context *bld_base,
-                                struct lp_build_emit_data *emit_data);
+static void si_llvm_emit_barrier(struct si_shader_context *ctx);
 
 static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
 
@@ -596,15 +583,6 @@ void si_llvm_load_input_vs(
                out[i] = ac_to_float(&ctx->ac, fetches[i]);
 }
 
-static void declare_input_vs(
-       struct si_shader_context *ctx,
-       unsigned input_index,
-       const struct tgsi_full_declaration *decl,
-       LLVMValueRef out[4])
-{
-       si_llvm_load_input_vs(ctx, input_index, out);
-}
-
 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
                                 unsigned swizzle)
 {
@@ -626,53 +604,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
        }
 }
 
-/**
- * Return the value of tgsi_ind_register for indexing.
- * This is the indirect index with the constant offset added to it.
- */
-LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
-                                  const struct tgsi_ind_register *ind,
-                                  unsigned addr_mul,
-                                  int rel_index)
-{
-       LLVMValueRef result;
-
-       if (ind->File == TGSI_FILE_ADDRESS) {
-               result = ctx->addrs[ind->Index][ind->Swizzle];
-               result = LLVMBuildLoad(ctx->ac.builder, result, "");
-       } else {
-               struct tgsi_full_src_register src = {};
-
-               src.Register.File = ind->File;
-               src.Register.Index = ind->Index;
-
-               /* Set the second index to 0 for constants. */
-               if (ind->File == TGSI_FILE_CONSTANT)
-                       src.Register.Dimension = 1;
-
-               result = ctx->bld_base.emit_fetch_funcs[ind->File](&ctx->bld_base, &src,
-                                                                  TGSI_TYPE_SIGNED,
-                                                                  ind->Swizzle);
-               result = ac_to_integer(&ctx->ac, result);
-       }
-
-       return ac_build_imad(&ctx->ac, result, LLVMConstInt(ctx->i32, addr_mul, 0),
-                            LLVMConstInt(ctx->i32, rel_index, 0));
-}
-
-/**
- * Like si_get_indirect_index, but restricts the return value to a (possibly
- * undefined) value inside [0..num).
- */
-LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
-                                          const struct tgsi_ind_register *ind,
-                                          int rel_index, unsigned num)
-{
-       LLVMValueRef result = si_get_indirect_index(ctx, ind, 1, rel_index);
-
-       return si_llvm_bound_index(ctx, result, num);
-}
-
 static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx,
                                                        LLVMValueRef vertex_dw_stride,
                                                        LLVMValueRef base_addr,
@@ -701,78 +632,6 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context
                            LLVMConstInt(ctx->i32, param * 4, 0), "");
 }
 
-/**
- * Calculate a dword address given an input or output register and a stride.
- */
-static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
-                                  const struct tgsi_full_dst_register *dst,
-                                  const struct tgsi_full_src_register *src,
-                                  LLVMValueRef vertex_dw_stride,
-                                  LLVMValueRef base_addr)
-{
-       struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       ubyte *name, *index, *array_first;
-       int input_index;
-       struct tgsi_full_dst_register reg;
-       LLVMValueRef vertex_index = NULL;
-       LLVMValueRef ind_index = NULL;
-
-       /* Set the register description. The address computation is the same
-        * for sources and destinations. */
-       if (src) {
-               reg.Register.File = src->Register.File;
-               reg.Register.Index = src->Register.Index;
-               reg.Register.Indirect = src->Register.Indirect;
-               reg.Register.Dimension = src->Register.Dimension;
-               reg.Indirect = src->Indirect;
-               reg.Dimension = src->Dimension;
-               reg.DimIndirect = src->DimIndirect;
-       } else
-               reg = *dst;
-
-       /* If the register is 2-dimensional (e.g. an array of vertices
-        * in a primitive), calculate the base address of the vertex. */
-       if (reg.Register.Dimension) {
-               if (reg.Dimension.Indirect)
-                       vertex_index = si_get_indirect_index(ctx, &reg.DimIndirect,
-                                                     1, reg.Dimension.Index);
-               else
-                       vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
-       }
-
-       /* Get information about the register. */
-       if (reg.Register.File == TGSI_FILE_INPUT) {
-               name = info->input_semantic_name;
-               index = info->input_semantic_index;
-               array_first = info->input_array_first;
-       } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
-               name = info->output_semantic_name;
-               index = info->output_semantic_index;
-               array_first = info->output_array_first;
-       } else {
-               assert(0);
-               return NULL;
-       }
-
-       if (reg.Register.Indirect) {
-               /* Add the relative address of the element. */
-               if (reg.Indirect.ArrayID)
-                       input_index = array_first[reg.Indirect.ArrayID];
-               else
-                       input_index = reg.Register.Index;
-
-               ind_index = si_get_indirect_index(ctx, &reg.Indirect,
-                                                 1, reg.Register.Index - input_index);
-       } else {
-               input_index = reg.Register.Index;
-       }
-
-       return get_dw_address_from_generic_indices(ctx, vertex_dw_stride,
-                                                  base_addr, vertex_index,
-                                                  ind_index, name[input_index],
-                                                  index[input_index]);
-}
-
 /* The offchip buffer layout for TCS->TES is
  *
  * - attribute 0 of patch 0 vertex 0
@@ -854,65 +713,24 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
                                          vertex_index, param_index);
 }
 
-static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
-                                       struct si_shader_context *ctx,
-                                       const struct tgsi_full_dst_register *dst,
-                                       const struct tgsi_full_src_register *src)
+static LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
+                                         LLVMTypeRef type,
+                                         LLVMValueRef val1,
+                                         LLVMValueRef val2)
 {
-       struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       ubyte *name, *index, *array_first;
-       struct tgsi_full_src_register reg;
-       LLVMValueRef vertex_index = NULL;
-       LLVMValueRef param_index = NULL;
-       unsigned param_base;
-
-       reg = src ? *src : tgsi_full_src_register_from_dst(dst);
-
-       if (reg.Register.Dimension) {
-               if (reg.Dimension.Indirect)
-                       vertex_index = si_get_indirect_index(ctx, &reg.DimIndirect,
-                                                            1, reg.Dimension.Index);
-               else
-                       vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
-       }
-
-       /* Get information about the register. */
-       if (reg.Register.File == TGSI_FILE_INPUT) {
-               name = info->input_semantic_name;
-               index = info->input_semantic_index;
-               array_first = info->input_array_first;
-       } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
-               name = info->output_semantic_name;
-               index = info->output_semantic_index;
-               array_first = info->output_array_first;
-       } else {
-               assert(0);
-               return NULL;
-       }
-
-       if (reg.Register.Indirect) {
-               if (reg.Indirect.ArrayID)
-                       param_base = array_first[reg.Indirect.ArrayID];
-               else
-                       param_base = reg.Register.Index;
-
-               param_index = si_get_indirect_index(ctx, &reg.Indirect,
-                                                   1, reg.Register.Index - param_base);
-       } else {
-               param_base = reg.Register.Index;
-       }
-
-       return get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
-                                                              param_index, name[param_base],
-                                                              index[param_base]);
+       LLVMValueRef values[2] = {
+               ac_to_integer(&ctx->ac, val1),
+               ac_to_integer(&ctx->ac, val2),
+       };
+       LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
+       return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 }
 
-static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
+static LLVMValueRef buffer_load(struct si_shader_context *ctx,
                                 LLVMTypeRef type, unsigned swizzle,
                                 LLVMValueRef buffer, LLVMValueRef offset,
                                 LLVMValueRef base, bool can_speculate)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef value, value2;
        LLVMTypeRef vec_type = LLVMVectorType(type, 4);
 
@@ -938,7 +756,7 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
        value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
                                   swizzle * 4 + 4, ac_glc, can_speculate, false);
 
-       return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+       return si_build_gather_64bit(ctx, type, value, value2);
 }
 
 /**
@@ -948,30 +766,28 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
  * \param swizzle      offset (typically 0..3); it can be ~0, which loads a vec4
  * \param dw_addr      address in dwords
  */
-static LLVMValueRef lshs_lds_load(struct lp_build_tgsi_context *bld_base,
-                            LLVMTypeRef type, unsigned swizzle,
-                            LLVMValueRef dw_addr)
+static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx,
+                                 LLVMTypeRef type, unsigned swizzle,
+                                 LLVMValueRef dw_addr)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef value;
 
        if (swizzle == ~0) {
-               LLVMValueRef values[TGSI_NUM_CHANNELS];
+               LLVMValueRef values[4];
 
-               for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
-                       values[chan] = lshs_lds_load(bld_base, type, chan, dw_addr);
+               for (unsigned chan = 0; chan < 4; chan++)
+                       values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
 
-               return ac_build_gather_values(&ctx->ac, values,
-                                             TGSI_NUM_CHANNELS);
+               return ac_build_gather_values(&ctx->ac, values, 4);
        }
 
        /* Split 64-bit loads. */
        if (llvm_type_is_64bit(ctx, type)) {
                LLVMValueRef lo, hi;
 
-               lo = lshs_lds_load(bld_base, ctx->i32, swizzle, dw_addr);
-               hi = lshs_lds_load(bld_base, ctx->i32, swizzle + 1, dw_addr);
-               return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+               lo = lshs_lds_load(ctx, ctx->i32, swizzle, dw_addr);
+               hi = lshs_lds_load(ctx, ctx->i32, swizzle + 1, dw_addr);
+               return si_build_gather_64bit(ctx, type, lo, hi);
        }
 
        dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
@@ -1049,21 +865,6 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx,
        return ac_build_gather_values(&ctx->ac, desc, 4);
 }
 
-static LLVMValueRef fetch_input_tcs(
-       struct lp_build_tgsi_context *bld_base,
-       const struct tgsi_full_src_register *reg,
-       enum tgsi_opcode_type type, unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef dw_addr, stride;
-       unsigned swizzle = swizzle_in & 0xffff;
-       stride = get_tcs_in_vertex_dw_stride(ctx);
-       dw_addr = get_tcs_in_current_patch_offset(ctx);
-       dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
-
-       return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
-}
-
 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
                                             LLVMTypeRef type,
                                             LLVMValueRef vertex_index,
@@ -1079,7 +880,6 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        LLVMValueRef dw_addr, stride;
        ubyte name, index;
 
@@ -1125,49 +925,12 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
                        offset *= 2;
 
                offset += component;
-               value[i + component] = lshs_lds_load(bld_base, type, offset, dw_addr);
+               value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
        }
 
        return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 }
 
-static LLVMValueRef fetch_output_tcs(
-               struct lp_build_tgsi_context *bld_base,
-               const struct tgsi_full_src_register *reg,
-               enum tgsi_opcode_type type, unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef dw_addr, stride;
-       unsigned swizzle = (swizzle_in & 0xffff);
-
-       if (reg->Register.Dimension) {
-               stride = get_tcs_out_vertex_dw_stride(ctx);
-               dw_addr = get_tcs_out_current_patch_offset(ctx);
-               dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
-       } else {
-               dw_addr = get_tcs_out_current_patch_data_offset(ctx);
-               dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr);
-       }
-
-       return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
-}
-
-static LLVMValueRef fetch_input_tes(
-       struct lp_build_tgsi_context *bld_base,
-       const struct tgsi_full_src_register *reg,
-       enum tgsi_opcode_type type, unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef base, addr;
-       unsigned swizzle = (swizzle_in & 0xffff);
-
-       base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-       addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
-
-       return buffer_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle,
-                          ctx->tess_offchip_ring, base, addr, true);
-}
-
 LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
                                   LLVMTypeRef type,
                                   LLVMValueRef vertex_index,
@@ -1226,110 +989,13 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
                }
 
                offset += component;
-               value[i + component] = buffer_load(&ctx->bld_base, type, offset,
+               value[i + component] = buffer_load(ctx, type, offset,
                                                   ctx->tess_offchip_ring, base, addr, true);
        }
 
        return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 }
 
-static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
-                            const struct tgsi_full_instruction *inst,
-                            const struct tgsi_opcode_info *info,
-                            unsigned index,
-                            LLVMValueRef dst[4])
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       const struct tgsi_full_dst_register *reg = &inst->Dst[index];
-       const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
-       unsigned chan_index;
-       LLVMValueRef dw_addr, stride;
-       LLVMValueRef buffer, base, buf_addr;
-       LLVMValueRef values[4];
-       bool skip_lds_store;
-       bool is_tess_factor = false, is_tess_inner = false;
-
-       /* Only handle per-patch and per-vertex outputs here.
-        * Vectors will be lowered to scalars and this function will be called again.
-        */
-       if (reg->Register.File != TGSI_FILE_OUTPUT ||
-           (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
-               si_llvm_emit_store(bld_base, inst, info, index, dst);
-               return;
-       }
-
-       if (reg->Register.Dimension) {
-               stride = get_tcs_out_vertex_dw_stride(ctx);
-               dw_addr = get_tcs_out_current_patch_offset(ctx);
-               dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
-               skip_lds_store = !sh_info->reads_pervertex_outputs;
-       } else {
-               dw_addr = get_tcs_out_current_patch_data_offset(ctx);
-               dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr);
-               skip_lds_store = !sh_info->reads_perpatch_outputs;
-
-               if (!reg->Register.Indirect) {
-                       int name = sh_info->output_semantic_name[reg->Register.Index];
-
-                       /* Always write tess factors into LDS for the TCS epilog. */
-                       if (name == TGSI_SEMANTIC_TESSINNER ||
-                           name == TGSI_SEMANTIC_TESSOUTER) {
-                               /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
-                               skip_lds_store = !sh_info->reads_tessfactor_outputs &&
-                                                ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
-                               is_tess_factor = true;
-                               is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
-                       }
-               }
-       }
-
-       buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
-
-       base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-       buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
-
-       uint32_t writemask = reg->Register.WriteMask;
-       while (writemask) {
-               chan_index = u_bit_scan(&writemask);
-               LLVMValueRef value = dst[chan_index];
-
-               if (inst->Instruction.Saturate)
-                       value = ac_build_clamp(&ctx->ac, value);
-
-               /* Skip LDS stores if there is no LDS read of this output. */
-               if (!skip_lds_store)
-                       lshs_lds_store(ctx, chan_index, dw_addr, value);
-
-               value = ac_to_integer(&ctx->ac, value);
-               values[chan_index] = value;
-
-               if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
-                       ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
-                                                   buf_addr, base,
-                                                   4 * chan_index, ac_glc);
-               }
-
-               /* Write tess factors into VGPRs for the epilog. */
-               if (is_tess_factor &&
-                   ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
-                       if (!is_tess_inner) {
-                               LLVMBuildStore(ctx->ac.builder, value, /* outer */
-                                              ctx->invoc0_tess_factors[chan_index]);
-                       } else if (chan_index < 2) {
-                               LLVMBuildStore(ctx->ac.builder, value, /* inner */
-                                              ctx->invoc0_tess_factors[4 + chan_index]);
-                       }
-               }
-       }
-
-       if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
-               LLVMValueRef value = ac_build_gather_values(&ctx->ac,
-                                                           values, 4);
-               ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
-                                           base, 0, ac_glc);
-       }
-}
-
 static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
                                    const struct nir_variable *var,
                                    LLVMValueRef vertex_index,
@@ -1452,14 +1118,13 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
        }
 }
 
-LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
-                                  unsigned input_index,
-                                  unsigned vtx_offset_param,
-                                  LLVMTypeRef type,
-                                  unsigned swizzle)
+static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
+                                         unsigned input_index,
+                                         unsigned vtx_offset_param,
+                                         LLVMTypeRef type,
+                                         unsigned swizzle)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        struct si_shader *shader = ctx->shader;
        LLVMValueRef vtx_offset, soffset;
        struct tgsi_shader_info *info = &shader->selector->info;
@@ -1512,14 +1177,13 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
 
        /* GFX6: input load from the ESGS ring in memory. */
        if (swizzle == ~0) {
-               LLVMValueRef values[TGSI_NUM_CHANNELS];
+               LLVMValueRef values[4];
                unsigned chan;
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+               for (chan = 0; chan < 4; chan++) {
                        values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param,
                                                             type, chan);
                }
-               return ac_build_gather_values(&ctx->ac, values,
-                                             TGSI_NUM_CHANNELS);
+               return ac_build_gather_values(&ctx->ac, values, 4);
        }
 
        /* Get the vertex offset parameter on GFX6. */
@@ -1540,7 +1204,7 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
                value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
                                              ctx->i32_0, vtx_offset, soffset,
                                              0, ac_glc, true, false);
-               return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+               return si_build_gather_64bit(ctx, type, value, value2);
        }
        return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
 }
@@ -1570,58 +1234,6 @@ static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
        return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 }
 
-static LLVMValueRef fetch_input_gs(
-       struct lp_build_tgsi_context *bld_base,
-       const struct tgsi_full_src_register *reg,
-       enum tgsi_opcode_type type,
-       unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       unsigned swizzle = swizzle_in & 0xffff;
-
-       unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
-       if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
-               return si_get_primitive_id(ctx, swizzle);
-
-       if (!reg->Register.Dimension)
-               return NULL;
-
-       return si_llvm_load_input_gs(&ctx->abi, reg->Register.Index,
-                                    reg->Dimension.Index,
-                                    tgsi2llvmtype(bld_base, type),
-                                    swizzle);
-}
-
-static int lookup_interp_param_index(unsigned interpolate, unsigned location)
-{
-       switch (interpolate) {
-       case TGSI_INTERPOLATE_CONSTANT:
-               return 0;
-
-       case TGSI_INTERPOLATE_LINEAR:
-               if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
-                       return SI_PARAM_LINEAR_SAMPLE;
-               else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
-                       return SI_PARAM_LINEAR_CENTROID;
-               else
-                       return SI_PARAM_LINEAR_CENTER;
-               break;
-       case TGSI_INTERPOLATE_COLOR:
-       case TGSI_INTERPOLATE_PERSPECTIVE:
-               if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
-                       return SI_PARAM_PERSP_SAMPLE;
-               else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
-                       return SI_PARAM_PERSP_CENTROID;
-               else
-                       return SI_PARAM_PERSP_CENTER;
-               break;
-       default:
-               fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
-               return -1;
-       }
-}
-
 static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
                                       unsigned attr_index, unsigned chan,
                                       LLVMValueRef prim_mask,
@@ -1654,9 +1266,8 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
  * @param face                 SI_PARAM_FRONT_FACE
  * @param result               the return value (4 components)
  */
-static void interp_fs_input(struct si_shader_context *ctx,
+static void interp_fs_color(struct si_shader_context *ctx,
                            unsigned input_index,
-                           unsigned semantic_name,
                            unsigned semantic_index,
                            unsigned num_interp_inputs,
                            unsigned colors_read_mask,
@@ -1693,8 +1304,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
                                                ctx->i32_1, "");
        }
 
-       if (semantic_name == TGSI_SEMANTIC_COLOR &&
-           ctx->shader->key.part.ps.prolog.color_two_side) {
+       if (ctx->shader->key.part.ps.prolog.color_two_side) {
                LLVMValueRef is_face_positive;
 
                /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
@@ -1707,7 +1317,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
                is_face_positive = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
                                                 face, ctx->i32_0, "");
 
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+               for (chan = 0; chan < 4; chan++) {
                        LLVMValueRef front, back;
 
                        front = si_build_fs_interp(ctx,
@@ -1723,14 +1333,8 @@ static void interp_fs_input(struct si_shader_context *ctx,
                                                back,
                                                "");
                }
-       } else if (semantic_name == TGSI_SEMANTIC_FOG) {
-               result[0] = si_build_fs_interp(ctx, input_index,
-                                              0, prim_mask, i, j);
-               result[1] =
-               result[2] = LLVMConstReal(ctx->f32, 0.0f);
-               result[3] = LLVMConstReal(ctx->f32, 1.0f);
        } else {
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+               for (chan = 0; chan < 4; chan++) {
                        result[chan] = si_build_fs_interp(ctx,
                                                          input_index, chan,
                                                          prim_mask, i, j);
@@ -1738,60 +1342,6 @@ static void interp_fs_input(struct si_shader_context *ctx,
        }
 }
 
-void si_llvm_load_input_fs(
-       struct si_shader_context *ctx,
-       unsigned input_index,
-       LLVMValueRef out[4])
-{
-       struct si_shader *shader = ctx->shader;
-       struct tgsi_shader_info *info = &shader->selector->info;
-       LLVMValueRef main_fn = ctx->main_fn;
-       LLVMValueRef interp_param = NULL;
-       int interp_param_idx;
-       enum tgsi_semantic semantic_name = info->input_semantic_name[input_index];
-       unsigned semantic_index = info->input_semantic_index[input_index];
-       enum tgsi_interpolate_mode interp_mode = info->input_interpolate[input_index];
-       enum tgsi_interpolate_loc interp_loc = info->input_interpolate_loc[input_index];
-
-       /* Get colors from input VGPRs (set by the prolog). */
-       if (semantic_name == TGSI_SEMANTIC_COLOR) {
-               unsigned colors_read = shader->selector->info.colors_read;
-               unsigned mask = colors_read >> (semantic_index * 4);
-               unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
-                                 (semantic_index ? util_bitcount(colors_read & 0xf) : 0);
-               LLVMValueRef undef = LLVMGetUndef(ctx->f32);
-
-               out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
-               out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
-               out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
-               out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
-               return;
-       }
-
-       interp_param_idx = lookup_interp_param_index(interp_mode, interp_loc);
-       if (interp_param_idx == -1)
-               return;
-       else if (interp_param_idx) {
-               interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
-       }
-
-       interp_fs_input(ctx, input_index, semantic_name,
-                       semantic_index, 0, /* this param is unused */
-                       shader->selector->info.colors_read, interp_param,
-                       ac_get_arg(&ctx->ac, ctx->args.prim_mask),
-                       LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
-                       &out[0]);
-}
-
-static void declare_input_fs(
-       struct si_shader_context *ctx,
-       unsigned input_index,
-       const struct tgsi_full_declaration *decl,
-       LLVMValueRef out[4])
-{
-       si_llvm_load_input_fs(ctx, input_index, out);
-}
-
 LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
 {
        return si_unpack_param(ctx, ctx->args.ancillary, 8, 4);
@@ -1913,7 +1463,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx,
        addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
                                          LLVMConstInt(ctx->i32, param, 0));
 
-       return buffer_load(&ctx->bld_base, ctx->f32,
+       return buffer_load(ctx, ctx->f32,
                           ~0, ctx->tess_offchip_ring, base, addr, true);
 
 }
@@ -1982,211 +1532,6 @@ static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
                unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
 }
 
-void si_load_system_value(struct si_shader_context *ctx,
-                         unsigned index,
-                         const struct tgsi_full_declaration *decl)
-{
-       LLVMValueRef value = 0;
-
-       assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
-
-       switch (decl->Semantic.Name) {
-       case TGSI_SEMANTIC_INSTANCEID:
-               value = ctx->abi.instance_id;
-               break;
-
-       case TGSI_SEMANTIC_VERTEXID:
-               value = LLVMBuildAdd(ctx->ac.builder,
-                                    ctx->abi.vertex_id,
-                                    ac_get_arg(&ctx->ac, ctx->args.base_vertex), "");
-               break;
-
-       case TGSI_SEMANTIC_VERTEXID_NOBASE:
-               /* Unused. Clarify the meaning in indexed vs. non-indexed
-                * draws if this is ever used again. */
-               assert(false);
-               break;
-
-       case TGSI_SEMANTIC_BASEVERTEX:
-               value = get_base_vertex(&ctx->abi);
-               break;
-
-       case TGSI_SEMANTIC_BASEINSTANCE:
-               value = ac_get_arg(&ctx->ac, ctx->args.start_instance);
-               break;
-
-       case TGSI_SEMANTIC_DRAWID:
-               value = ac_get_arg(&ctx->ac, ctx->args.draw_id);
-               break;
-
-       case TGSI_SEMANTIC_INVOCATIONID:
-               if (ctx->type == PIPE_SHADER_TESS_CTRL) {
-                       value = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
-               } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
-                       if (ctx->screen->info.chip_class >= GFX10) {
-                               value = LLVMBuildAnd(ctx->ac.builder,
-                                                    ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
-                                                    LLVMConstInt(ctx->i32, 127, 0), "");
-                       } else {
-                               value = ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id);
-                       }
-               } else {
-                       assert(!"INVOCATIONID not implemented");
-               }
-               break;
-
-       case TGSI_SEMANTIC_POSITION:
-       {
-               LLVMValueRef pos[4] = {
-                       LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
-                       LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
-                       LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
-                       ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
-                                     LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)),
-               };
-               value = ac_build_gather_values(&ctx->ac, pos, 4);
-               break;
-       }
-
-       case TGSI_SEMANTIC_FACE:
-               value = ac_get_arg(&ctx->ac, ctx->args.front_face);
-               break;
-
-       case TGSI_SEMANTIC_SAMPLEID:
-               value = si_get_sample_id(ctx);
-               break;
-
-       case TGSI_SEMANTIC_SAMPLEPOS: {
-               LLVMValueRef pos[4] = {
-                       LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
-                       LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
-                       LLVMConstReal(ctx->f32, 0),
-                       LLVMConstReal(ctx->f32, 0)
-               };
-               pos[0] = ac_build_fract(&ctx->ac, pos[0], 32);
-               pos[1] = ac_build_fract(&ctx->ac, pos[1], 32);
-               value = ac_build_gather_values(&ctx->ac, pos, 4);
-               break;
-       }
-
-       case TGSI_SEMANTIC_SAMPLEMASK:
-               /* This can only occur with the OpenGL Core profile, which
-                * doesn't support smoothing.
-                */
-               value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
-               break;
-
-       case TGSI_SEMANTIC_TESSCOORD:
-               value = si_load_tess_coord(&ctx->abi);
-               break;
-
-       case TGSI_SEMANTIC_VERTICESIN:
-               value = si_load_patch_vertices_in(&ctx->abi);
-               break;
-
-       case TGSI_SEMANTIC_TESSINNER:
-       case TGSI_SEMANTIC_TESSOUTER:
-               value = load_tess_level(ctx, decl->Semantic.Name);
-               break;
-
-       case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
-       case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
-               value = load_tess_level_default(ctx, decl->Semantic.Name);
-               break;
-
-       case TGSI_SEMANTIC_PRIMID:
-               value = si_get_primitive_id(ctx, 0);
-               break;
-
-       case TGSI_SEMANTIC_GRID_SIZE:
-               value = ac_get_arg(&ctx->ac, ctx->args.num_work_groups);
-               break;
-
-       case TGSI_SEMANTIC_BLOCK_SIZE:
-               value = get_block_size(&ctx->abi);
-               break;
-
-       case TGSI_SEMANTIC_BLOCK_ID:
-       {
-               LLVMValueRef values[3];
-
-               for (int i = 0; i < 3; i++) {
-                       values[i] = ctx->i32_0;
-                       if (ctx->args.workgroup_ids[i].used) {
-                               values[i] = ac_get_arg(&ctx->ac, ctx->args.workgroup_ids[i]);
-                       }
-               }
-               value = ac_build_gather_values(&ctx->ac, values, 3);
-               break;
-       }
-
-       case TGSI_SEMANTIC_THREAD_ID:
-               value = ac_get_arg(&ctx->ac, ctx->args.local_invocation_ids);
-               break;
-
-       case TGSI_SEMANTIC_HELPER_INVOCATION:
-               value = ac_build_load_helper_invocation(&ctx->ac);
-               break;
-
-       case TGSI_SEMANTIC_SUBGROUP_SIZE:
-               value = LLVMConstInt(ctx->i32, ctx->ac.wave_size, 0);
-               break;
-
-       case TGSI_SEMANTIC_SUBGROUP_INVOCATION:
-               value = ac_get_thread_id(&ctx->ac);
-               break;
-
-       case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
-       {
-               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
-               if (ctx->ac.wave_size == 64)
-                       id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
-               value = LLVMBuildShl(ctx->ac.builder,
-                                    LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, "");
-               if (ctx->ac.wave_size == 32)
-                       value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
-               value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
-               break;
-       }
-
-       case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
-       case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
-       case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
-       case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
-       {
-               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
-               if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
-                   decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
-                       /* All bits set except LSB */
-                       value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0);
-               } else {
-                       /* All bits set */
-                       value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0);
-               }
-               if (ctx->ac.wave_size == 64)
-                       id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
-               value = LLVMBuildShl(ctx->ac.builder, value, id, "");
-               if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
-                   decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
-                       value = LLVMBuildNot(ctx->ac.builder, value, "");
-               if (ctx->ac.wave_size == 32)
-                       value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
-               value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
-               break;
-       }
-
-       case TGSI_SEMANTIC_CS_USER_DATA_AMD:
-               value = ac_get_arg(&ctx->ac, ctx->cs_user_data);
-               break;
-
-       default:
-               assert(!"unknown system value");
-               return;
-       }
-
-       ctx->system_values[index] = value;
-}
-
 void si_declare_compute_memory(struct si_shader_context *ctx)
 {
        struct si_shader_selector *sel = ctx->shader->selector;
@@ -2206,15 +1551,6 @@ void si_declare_compute_memory(struct si_shader_context *ctx)
        ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 }
 
-void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
-                                   const struct tgsi_full_declaration *decl)
-{
-       assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
-       assert(decl->Range.First == decl->Range.Last);
-
-       si_declare_compute_memory(ctx);
-}
-
 static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
 {
        LLVMValueRef ptr =
@@ -2256,15 +1592,6 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c
        return ac_build_gather_values(&ctx->ac, desc_elems, 4);
 }
 
-static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
-{
-       LLVMValueRef list_ptr = ac_get_arg(&ctx->ac,
-                                          ctx->const_and_shader_buffers);
-
-       return ac_build_load_to_sgpr(&ctx->ac, list_ptr,
-                                    LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
-}
-
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@@ -2299,72 +1626,6 @@ load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
        return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
 }
 
-static LLVMValueRef fetch_constant(
-       struct lp_build_tgsi_context *bld_base,
-       const struct tgsi_full_src_register *reg,
-       enum tgsi_opcode_type type,
-       unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct si_shader_selector *sel = ctx->shader->selector;
-       const struct tgsi_ind_register *ireg = &reg->Indirect;
-       unsigned buf, idx;
-       unsigned swizzle = swizzle_in & 0xffff;
-
-       LLVMValueRef addr, bufp;
-
-       if (swizzle_in == LP_CHAN_ALL) {
-               unsigned chan;
-               LLVMValueRef values[4];
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
-                       values[chan] = fetch_constant(bld_base, reg, type, chan);
-
-               return ac_build_gather_values(&ctx->ac, values, 4);
-       }
-
-       /* Split 64-bit loads. */
-       if (tgsi_type_is_64bit(type)) {
-               LLVMValueRef lo, hi;
-
-               lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
-               hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16));
-               return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-                                               lo, hi);
-       }
-
-       idx = reg->Register.Index * 4 + swizzle;
-       if (reg->Register.Indirect) {
-               addr = si_get_indirect_index(ctx, ireg, 16, idx * 4);
-       } else {
-               addr = LLVMConstInt(ctx->i32, idx * 4, 0);
-       }
-
-       /* Fast path when user data SGPRs point to constant buffer 0 directly. */
-       if (sel->info.const_buffers_declared == 1 &&
-           sel->info.shader_buffers_declared == 0) {
-               LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
-               LLVMValueRef result = buffer_load_const(ctx, desc, addr);
-               return bitcast(bld_base, type, result);
-       }
-
-       assert(reg->Register.Dimension);
-       buf = reg->Dimension.Index;
-
-       if (reg->Dimension.Indirect) {
-               LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
-               LLVMValueRef index;
-               index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
-                                                     reg->Dimension.Index,
-                                                     ctx->num_const_buffers);
-               index = LLVMBuildAdd(ctx->ac.builder, index,
-                                    LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
-               bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
-       } else
-               bufp = load_const_buffer_desc(ctx, buf);
-
-       return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
-}
-
 /* Initialize arguments for the shader export intrinsic */
 static void si_llvm_init_export_args(struct si_shader_context *ctx,
                                     LLVMValueRef *values,
@@ -2495,11 +1756,8 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
        }
 }
 
-static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
-                         LLVMValueRef alpha)
+static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
        if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
                static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
                        [PIPE_FUNC_LESS] = LLVMRealOLT,
@@ -2522,11 +1780,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
        }
 }
 
-static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
+static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx,
                                                  LLVMValueRef alpha,
                                                  unsigned samplemask_param)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef coverage;
 
        /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
@@ -2569,8 +1826,8 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx,
                args->out[3] = LLVMConstReal(ctx->f32, 0.0f);
 
                /* Compute dot products of position and user clip plane vectors */
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                       for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
+               for (chan = 0; chan < 4; chan++) {
+                       for (const_chan = 0; const_chan < 4; const_chan++) {
                                LLVMValueRef addr =
                                        LLVMConstInt(ctx->i32, ((reg_index * 4 + chan) * 4 +
                                                                const_chan) * 4, 0);
@@ -3030,9 +2287,8 @@ void si_llvm_export_vs(struct si_shader_context *ctx,
  * Forward all outputs from the vertex shader to the TES. This is only used
  * for the fixed function TCS.
  */
-static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
+static void si_copy_tcs_inputs(struct si_shader_context *ctx)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef invocation_id, buffer, buffer_offset;
        LLVMValueRef lds_vertex_stride, lds_base;
        uint64_t inputs;
@@ -3059,21 +2315,20 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
                                              invocation_id,
                                              LLVMConstInt(ctx->i32, i, 0));
 
-               LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
+               LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
 
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
                                            buffer_offset, 0, ac_glc);
        }
 }
 
-static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
+static void si_write_tess_factors(struct si_shader_context *ctx,
                                  LLVMValueRef rel_patch_id,
                                  LLVMValueRef invocation_id,
                                  LLVMValueRef tcs_out_current_patch_data_offset,
                                  LLVMValueRef invoc0_tf_outer[4],
                                  LLVMValueRef invoc0_tf_inner[2])
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
        unsigned tess_inner_index, tess_outer_index;
        LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
@@ -3082,7 +2337,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
        /* Add a barrier before loading tess factors from LDS. */
        if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
-               si_llvm_emit_barrier(NULL, bld_base, NULL);
+               si_llvm_emit_barrier(ctx);
 
        /* Do this only for invocation 0, because the tess levels are per-patch,
         * not per-vertex.
@@ -3144,11 +2399,11 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
                for (i = 0; i < outer_comps; i++) {
                        outer[i] = out[i] =
-                               lshs_lds_load(bld_base, ctx->ac.i32, i, lds_outer);
+                               lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
                }
                for (i = 0; i < inner_comps; i++) {
                        inner[i] = out[outer_comps+i] =
-                               lshs_lds_load(bld_base, ctx->ac.i32, i, lds_inner);
+                               lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
                }
        }
 
@@ -3279,11 +2534,10 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
                                      LLVMValueRef *addrs)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        LLVMBuilderRef builder = ctx->ac.builder;
        LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
-       si_copy_tcs_inputs(bld_base);
+       si_copy_tcs_inputs(ctx);
 
        rel_patch_id = get_rel_patch_id(ctx);
        invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
@@ -3595,12 +2849,6 @@ static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
        emit_gs_epilogue(ctx);
 }
 
-static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_gs_epilogue(ctx);
-}
-
 static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
                                     unsigned max_outputs,
                                     LLVMValueRef *addrs)
@@ -3677,24 +2925,15 @@ static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi,
        ctx->return_value = ret;
 }
 
-static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS,
-                             &ctx->outputs[0][0]);
-}
-
 struct si_ps_exports {
        unsigned num;
        struct ac_export_args args[10];
 };
 
-static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
+static void si_export_mrt_z(struct si_shader_context *ctx,
                            LLVMValueRef depth, LLVMValueRef stencil,
                            LLVMValueRef samplemask, struct si_ps_exports *exp)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        struct ac_export_args args;
 
        ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
@@ -3702,12 +2941,11 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
        memcpy(&exp->args[exp->num++], &args, sizeof(args));
 }
 
-static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+static void si_export_mrt_color(struct si_shader_context *ctx,
                                LLVMValueRef *color, unsigned index,
                                unsigned samplemask_param,
                                bool is_last, struct si_ps_exports *exp)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        int i;
 
        /* Clamp color */
@@ -3722,11 +2960,11 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
        /* Alpha test */
        if (index == 0 &&
            ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
-               si_alpha_test(bld_base, color[3]);
+               si_alpha_test(ctx, color[3]);
 
        /* Line & polygon smoothing */
        if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
-               color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+               color[3] = si_scale_alpha_by_sample_mask(ctx, color[3],
                                                         samplemask_param);
 
        /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
@@ -3873,345 +3111,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
        ctx->return_value = ret;
 }
 
-static void membar_emit(
-               const struct lp_build_tgsi_action *action,
-               struct lp_build_tgsi_context *bld_base,
-               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
-       unsigned flags = LLVMConstIntGetZExtValue(src0);
-       unsigned wait_flags = 0;
-
-       if (flags & TGSI_MEMBAR_THREAD_GROUP)
-               wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
-
-       if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
-                    TGSI_MEMBAR_SHADER_BUFFER |
-                    TGSI_MEMBAR_SHADER_IMAGE))
-               wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
-
-       if (flags & TGSI_MEMBAR_SHARED)
-               wait_flags |= AC_WAIT_LGKM;
-
-       ac_build_waitcnt(&ctx->ac, wait_flags);
-}
-
-static void clock_emit(
-               const struct lp_build_tgsi_action *action,
-               struct lp_build_tgsi_context *bld_base,
-               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef tmp = ac_build_shader_clock(&ctx->ac);
-
-       emit_data->output[0] =
-               LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, "");
-       emit_data->output[1] =
-               LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, "");
-}
-
-static void si_llvm_emit_ddxy(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       unsigned opcode = emit_data->info->opcode;
-       LLVMValueRef val;
-       int idx;
-       unsigned mask;
-
-       if (opcode == TGSI_OPCODE_DDX_FINE)
-               mask = AC_TID_MASK_LEFT;
-       else if (opcode == TGSI_OPCODE_DDY_FINE)
-               mask = AC_TID_MASK_TOP;
-       else
-               mask = AC_TID_MASK_TOP_LEFT;
-
-       /* for DDX we want to next X pixel, DDY next Y pixel. */
-       idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
-
-       val = ac_to_integer(&ctx->ac, emit_data->args[0]);
-       val = ac_build_ddxy(&ctx->ac, mask, idx, val);
-       emit_data->output[emit_data->chan] = val;
-}
-
-static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
-                               struct lp_build_tgsi_context *bld_base,
-                               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct si_shader *shader = ctx->shader;
-       const struct tgsi_shader_info *info = &shader->selector->info;
-       LLVMValueRef interp_param;
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       const struct tgsi_full_src_register *input = &inst->Src[0];
-       int input_base, input_array_size;
-       int chan;
-       int i;
-       LLVMValueRef prim_mask = ac_get_arg(&ctx->ac, ctx->args.prim_mask);
-       LLVMValueRef array_idx, offset_x = NULL, offset_y = NULL;
-       int interp_param_idx;
-       unsigned interp;
-       unsigned location;
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
-               /* offset is in second src, first two channels */
-               offset_x = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
-                                              TGSI_CHAN_X);
-               offset_y = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
-                                              TGSI_CHAN_Y);
-       } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
-               LLVMValueRef sample_position;
-               LLVMValueRef sample_id;
-               LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
-
-               /* fetch sample ID, then fetch its sample position,
-                * and place into first two channels.
-                */
-               sample_id = lp_build_emit_fetch(bld_base,
-                                               emit_data->inst, 1, TGSI_CHAN_X);
-               sample_id = ac_to_integer(&ctx->ac, sample_id);
-
-               /* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading
-                * Language 4.50 spec says about interpolateAtSample:
-                *
-                *    "Returns the value of the input interpolant variable at
-                *     the location of sample number sample. If multisample
-                *     buffers are not available, the input variable will be
-                *     evaluated at the center of the pixel. If sample sample
-                *     does not exist, the position used to interpolate the
-                *     input variable is undefined."
-                *
-                * This means that sample_id values outside of the valid are
-                * in fact valid input, and the usual mechanism for loading the
-                * sample position doesn't work.
-                */
-               if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
-                       LLVMValueRef center[4] = {
-                               LLVMConstReal(ctx->f32, 0.5),
-                               LLVMConstReal(ctx->f32, 0.5),
-                               ctx->ac.f32_0,
-                               ctx->ac.f32_0,
-                       };
-
-                       sample_position = ac_build_gather_values(&ctx->ac, center, 4);
-               } else {
-                       sample_position = load_sample_position(&ctx->abi, sample_id);
-               }
-
-               offset_x = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
-                                                  ctx->i32_0, "");
-
-               offset_x = LLVMBuildFSub(ctx->ac.builder, offset_x, halfval, "");
-               offset_y = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
-                                                  ctx->i32_1, "");
-               offset_y = LLVMBuildFSub(ctx->ac.builder, offset_y, halfval, "");
-       }
-
-       assert(input->Register.File == TGSI_FILE_INPUT);
-
-       if (input->Register.Indirect) {
-               unsigned array_id = input->Indirect.ArrayID;
-
-               if (array_id) {
-                       input_base = info->input_array_first[array_id];
-                       input_array_size = info->input_array_last[array_id] - input_base + 1;
-               } else {
-                       input_base = inst->Src[0].Register.Index;
-                       input_array_size = info->num_inputs - input_base;
-               }
-
-               array_idx = si_get_indirect_index(ctx, &input->Indirect,
-                                                 1, input->Register.Index - input_base);
-       } else {
-               input_base = inst->Src[0].Register.Index;
-               input_array_size = 1;
-               array_idx = ctx->i32_0;
-       }
-
-       interp = shader->selector->info.input_interpolate[input_base];
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
-           inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
-               location = TGSI_INTERPOLATE_LOC_CENTER;
-       else
-               location = TGSI_INTERPOLATE_LOC_CENTROID;
-
-       interp_param_idx = lookup_interp_param_index(interp, location);
-       if (interp_param_idx == -1)
-               return;
-       else if (interp_param_idx)
-               interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
-       else
-               interp_param = NULL;
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
-           inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
-               LLVMValueRef ij_out[2];
-               LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
-
-               /*
-                * take the I then J parameters, and the DDX/Y for it, and
-                * calculate the IJ inputs for the interpolator.
-                * temp1 = ddx * offset/sample.x + I;
-                * interp_param.I = ddy * offset/sample.y + temp1;
-                * temp1 = ddx * offset/sample.x + J;
-                * interp_param.J = ddy * offset/sample.y + temp1;
-                */
-               for (i = 0; i < 2; i++) {
-                       LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0);
-                       LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0);
-                       LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
-                                                                     ddxy_out, ix_ll, "");
-                       LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
-                                                                     ddxy_out, iy_ll, "");
-                       LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
-                                                                        interp_param, ix_ll, "");
-                       LLVMValueRef temp;
-
-                       interp_el = ac_to_float(&ctx->ac, interp_el);
-
-                       temp = ac_build_fmad(&ctx->ac, ddx_el, offset_x, interp_el);
-                       ij_out[i] = ac_build_fmad(&ctx->ac, ddy_el, offset_y, temp);
-               }
-               interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
-       }
-
-       if (interp_param)
-               interp_param = ac_to_float(&ctx->ac, interp_param);
-
-       for (chan = 0; chan < 4; chan++) {
-               LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size));
-               unsigned schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
-
-               for (unsigned idx = 0; idx < input_array_size; ++idx) {
-                       LLVMValueRef v, i = NULL, j = NULL;
-
-                       if (interp_param) {
-                               i = LLVMBuildExtractElement(
-                                       ctx->ac.builder, interp_param, ctx->i32_0, "");
-                               j = LLVMBuildExtractElement(
-                                       ctx->ac.builder, interp_param, ctx->i32_1, "");
-                       }
-                       v = si_build_fs_interp(ctx, input_base + idx, schan,
-                                              prim_mask, i, j);
-
-                       gather = LLVMBuildInsertElement(ctx->ac.builder,
-                               gather, v, LLVMConstInt(ctx->i32, idx, false), "");
-               }
-
-               emit_data->output[chan] = LLVMBuildExtractElement(
-                       ctx->ac.builder, gather, array_idx, "");
-       }
-}
-
-static void vote_all_emit(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-        LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, emit_data->args[0]);
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void vote_any_emit(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-        LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, emit_data->args[0]);
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void vote_eq_emit(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-        LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, emit_data->args[0]);
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void ballot_emit(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef tmp;
-
-       tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-       tmp = ac_build_ballot(&ctx->ac, tmp);
-
-       emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
-
-       if (ctx->ac.wave_size == 32) {
-               emit_data->output[1] = ctx->i32_0;
-       } else {
-               tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), "");
-               emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
-       }
-}
-
-static void read_lane_emit(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_READ_INVOC) {
-               emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                                        0, emit_data->src_chan);
-
-               /* Always read the source invocation (= lane) from the X channel. */
-               emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                                        1, TGSI_CHAN_X);
-               emit_data->arg_count = 2;
-       }
-
-       /* We currently have no other way to prevent LLVM from lifting the icmp
-        * calls to a dominating basic block.
-        */
-       ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]);
-
-       for (unsigned i = 0; i < emit_data->arg_count; ++i)
-               emit_data->args[i] = ac_to_integer(&ctx->ac, emit_data->args[i]);
-
-       emit_data->output[emit_data->chan] =
-               ac_build_intrinsic(&ctx->ac, action->intr_name,
-                                  ctx->i32, emit_data->args, emit_data->arg_count,
-                                  AC_FUNC_ATTR_READNONE |
-                                  AC_FUNC_ATTR_CONVERGENT);
-}
-
-static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
-                                      struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
-       LLVMValueRef imm;
-       unsigned stream;
-
-       assert(src0.File == TGSI_FILE_IMMEDIATE);
-
-       imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
-       stream = LLVMConstIntGetZExtValue(imm) & 0x3;
-       return stream;
-}
-
 /* Emit one vertex from the geometry shader */
 static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
                                unsigned stream,
@@ -4296,18 +3195,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
                ac_build_endif(&ctx->ac, 6505);
 }
 
-/* Emit one vertex from the geometry shader */
-static void si_tgsi_emit_vertex(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       unsigned stream = si_llvm_get_stream(bld_base, emit_data);
-
-       si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
-}
-
 /* Cut one primitive from the geometry shader */
 static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
                                   unsigned stream)
@@ -4324,23 +3211,8 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
                         si_get_gs_wave_id(ctx));
 }
 
-/* Cut one primitive from the geometry shader */
-static void si_tgsi_emit_primitive(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       si_llvm_emit_primitive(&ctx->abi, si_llvm_get_stream(bld_base, emit_data));
-}
-
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
-                                struct lp_build_tgsi_context *bld_base,
-                                struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_barrier(struct si_shader_context *ctx)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
        /* GFX6 only (thanks to a hw bug workaround):
         * The real barrier instruction isn’t needed, because an entire patch
         * always fits into a single wave.
@@ -5654,9 +4526,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        shader->selector = gs_selector;
        shader->is_gs_copy_shader = true;
 
-       si_init_shader_ctx(&ctx, sscreen, compiler,
-                          si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
-                          false);
+       si_llvm_context_init(&ctx, sscreen, compiler,
+                            si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
+                            64);
        ctx.shader = shader;
        ctx.type = PIPE_SHADER_VERTEX;
 
@@ -5917,47 +4789,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
        }
 }
 
-static void si_init_shader_ctx(struct si_shader_context *ctx,
-                              struct si_screen *sscreen,
-                              struct ac_llvm_compiler *compiler,
-                              unsigned wave_size,
-                              bool nir)
-{
-       struct lp_build_tgsi_context *bld_base;
-
-       si_llvm_context_init(ctx, sscreen, compiler, wave_size,
-                            nir ? 64 : wave_size);
-
-       bld_base = &ctx->bld_base;
-       bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
-
-       bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID].emit = build_interp_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE].emit = build_interp_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET].emit = build_interp_intrinsic;
-
-       bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
-
-       bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
-
-       bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
-       bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
-       bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
-       bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
-
-       bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
-       bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
-       bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
-       bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
-       bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
-       bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
-       bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
-       bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
-
-       bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
-       bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_tgsi_emit_primitive;
-       bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
-}
-
 static void si_optimize_vs_outputs(struct si_shader_context *ctx)
 {
        struct si_shader *shader = ctx->shader;
@@ -6014,17 +4845,34 @@ LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
                             si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
 }
 
+static void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
+{
+       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+       LLVMBuilderRef builder = ctx->ac.builder;
+
+       if (ctx->shader->selector->force_correct_derivs_after_kill) {
+               /* Kill immediately while maintaining WQM. */
+               ac_build_kill_if_false(&ctx->ac,
+                                      ac_build_wqm_vote(&ctx->ac, visible));
+
+               LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
+               mask = LLVMBuildAnd(builder, mask, visible, "");
+               LLVMBuildStore(builder, mask, ctx->postponed_kill);
+               return;
+       }
+
+       ac_build_kill_if_false(&ctx->ac, visible);
+}
+
 static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                                 struct nir_shader *nir, bool free_nir)
 {
        struct si_shader *shader = ctx->shader;
        struct si_shader_selector *sel = shader->selector;
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 
        // TODO clean all this up!
        switch (ctx->type) {
        case PIPE_SHADER_VERTEX:
-               ctx->load_input = declare_input_vs;
                if (shader->key.as_ls)
                        ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
                else if (shader->key.as_es)
@@ -6035,22 +4883,16 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                        ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
                else
                        ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
-               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                ctx->abi.load_base_vertex = get_base_vertex;
                break;
        case PIPE_SHADER_TESS_CTRL:
-               bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
                ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
                ctx->abi.load_tess_level = si_load_tess_level;
-               bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
-               bld_base->emit_store = store_output_tcs;
                ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
                ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
                ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
-               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                break;
        case PIPE_SHADER_TESS_EVAL:
-               bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
                ctx->abi.load_tess_varyings = si_nir_load_input_tes;
                ctx->abi.load_tess_coord = si_load_tess_coord;
                ctx->abi.load_tess_level = si_load_tess_level;
@@ -6061,20 +4903,15 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                        ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
                else
                        ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
-               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                break;
        case PIPE_SHADER_GEOMETRY:
-               bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
                ctx->abi.load_inputs = si_nir_load_input_gs;
                ctx->abi.emit_vertex = si_llvm_emit_vertex;
                ctx->abi.emit_primitive = si_llvm_emit_primitive;
                ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
-               bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
                break;
        case PIPE_SHADER_FRAGMENT:
-               ctx->load_input = declare_input_fs;
                ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
-               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                ctx->abi.load_sample_position = load_sample_position;
                ctx->abi.load_sample_mask_in = load_sample_mask_in;
                ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
@@ -6229,7 +5066,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                                 * and contains a barrier, it will wait there and then
                                 * reach s_endpgm.
                                 */
-                               si_llvm_emit_barrier(NULL, bld_base, NULL);
+                               si_llvm_emit_barrier(ctx);
                        }
                }
        }
@@ -6241,19 +5078,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                               ctx->postponed_kill);
        }
 
-       if (sel->tokens) {
-               if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
-                       fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
-                       return false;
-               }
-       } else {
-               bool success = si_nir_build_llvm(ctx, nir);
-               if (free_nir)
-                       ralloc_free(nir);
-               if (!success) {
-                       fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
-                       return false;
-               }
+       bool success = si_nir_build_llvm(ctx, nir);
+       if (free_nir)
+               ralloc_free(nir);
+       if (!success) {
+               fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
+               return false;
        }
 
        si_llvm_build_ret(ctx, ctx->return_value);
@@ -6899,10 +5729,10 @@ static struct nir_shader *get_nir_shader(struct si_shader_selector *sel,
        return NULL;
 }
 
-int si_compile_tgsi_shader(struct si_screen *sscreen,
-                          struct ac_llvm_compiler *compiler,
-                          struct si_shader *shader,
-                          struct pipe_debug_callback *debug)
+int si_compile_shader(struct si_screen *sscreen,
+                     struct ac_llvm_compiler *compiler,
+                     struct si_shader *shader,
+                     struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_shader_context ctx;
@@ -6914,16 +5744,12 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
         * conversion fails. */
        if (si_can_dump_shader(sscreen, sel->type) &&
            !(sscreen->debug_flags & DBG(NO_TGSI))) {
-               if (sel->tokens)
-                       tgsi_dump(sel->tokens, 0);
-               else
-                       nir_print_shader(nir, stderr);
+               nir_print_shader(nir, stderr);
                si_dump_streamout(&sel->so);
        }
 
-       si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader),
-                          nir != NULL);
-       si_llvm_context_set_ir(&ctx, shader, nir);
+       si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader), 64);
+       si_llvm_context_set_ir(&ctx, shader);
 
        memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
               sizeof(shader->info.vs_output_param_offset));
@@ -6982,7 +5808,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        shader_ls.key.mono = shader->key.mono;
                        shader_ls.key.opt = shader->key.opt;
                        shader_ls.is_monolithic = true;
-                       si_llvm_context_set_ir(&ctx, &shader_ls, nir);
+                       si_llvm_context_set_ir(&ctx, &shader_ls);
 
                        if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
                                si_llvm_dispose(&ctx);
@@ -7050,7 +5876,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        shader_es.key.mono = shader->key.mono;
                        shader_es.key.opt = shader->key.opt;
                        shader_es.is_monolithic = true;
-                       si_llvm_context_set_ir(&ctx, &shader_es, nir);
+                       si_llvm_context_set_ir(&ctx, &shader_es);
 
                        if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
                                si_llvm_dispose(&ctx);
@@ -7269,10 +6095,10 @@ si_get_shader_part(struct si_screen *sscreen,
        }
 
        struct si_shader_context ctx;
-       si_init_shader_ctx(&ctx, sscreen, compiler,
-                          si_get_wave_size(sscreen, type, shader.key.as_ngg,
-                                           shader.key.as_es),
-                          false);
+       si_llvm_context_init(&ctx, sscreen, compiler,
+                            si_get_wave_size(sscreen, type, shader.key.as_ngg,
+                                             shader.key.as_es),
+                            64);
        ctx.shader = &shader;
        ctx.type = type;
 
@@ -7540,8 +6366,6 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                                         union si_shader_part_key *key)
 {
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
        memset(&ctx->args, 0, sizeof(ctx->args));
 
        if (ctx->screen->info.chip_class >= GFX9) {
@@ -7608,7 +6432,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
        for (unsigned i = 0; i < 6; i++)
                invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
 
-       si_write_tess_factors(bld_base,
+       si_write_tess_factors(ctx,
                              ac_get_arg(&ctx->ac, rel_patch_id),
                              ac_get_arg(&ctx->ac, invocation_id),
                              ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
@@ -7914,9 +6738,8 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
                        face = ac_to_integer(&ctx->ac, face);
                }
 
-               interp_fs_input(ctx,
-                               key->ps_prolog.color_attr_index[i],
-                               TGSI_SEMANTIC_COLOR, i,
+               interp_fs_color(ctx,
+                               key->ps_prolog.color_attr_index[i], i,
                                key->ps_prolog.num_interp_inputs,
                                key->ps_prolog.colors_read, interp_ij,
                                prim_mask, face, color);
@@ -7990,7 +6813,6 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
                                        union si_shader_part_key *key)
 {
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
        int i;
        struct si_ps_exports exp = {};
@@ -8060,7 +6882,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
                for (i = 0; i < 4; i++)
                        color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
 
-               si_export_mrt_color(bld_base, color, mrt,
+               si_export_mrt_color(ctx, color, mrt,
                                    ctx->args.arg_count - 1,
                                    mrt == last_color_export, &exp);
        }
@@ -8074,7 +6896,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
                samplemask = LLVMGetParam(ctx->main_fn, vgpr++);
 
        if (depth || stencil || samplemask)
-               si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
+               si_export_mrt_z(ctx, depth, stencil, samplemask, &exp);
        else if (last_color_export == -1)
                ac_build_export_null(&ctx->ac);
 
@@ -8240,7 +7062,7 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil
                /* Monolithic shader (compiled as a whole, has many variants,
                 * may take a long time to compile).
                 */
-               r = si_compile_tgsi_shader(sscreen, compiler, shader, debug);
+               r = si_compile_shader(sscreen, compiler, shader, debug);
                if (r)
                        return false;
        } else {
index d9a199bfa3c3a70a28dcf61e6f4f0d140abdbead..30dbe1c6a6e0243a39cc70cafd0804811e2d0e65 100644 (file)
@@ -326,7 +326,6 @@ struct si_shader_selector {
 
        struct si_shader        *gs_copy_shader;
 
-       struct tgsi_token       *tokens;
        struct nir_shader       *nir;
        void                    *nir_binary;
        unsigned                nir_size;
@@ -730,10 +729,10 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                           struct ac_llvm_compiler *compiler,
                           struct si_shader_selector *gs_selector,
                           struct pipe_debug_callback *debug);
-int si_compile_tgsi_shader(struct si_screen *sscreen,
-                          struct ac_llvm_compiler *compiler,
-                          struct si_shader *shader,
-                          struct pipe_debug_callback *debug);
+int si_compile_shader(struct si_screen *sscreen,
+                     struct ac_llvm_compiler *compiler,
+                     struct si_shader *shader,
+                     struct pipe_debug_callback *debug);
 bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug);
index a9b40f41b4cd726df5dc85d56deb32c68552eff9..1ec74a84a696237adaabd84e0a2265c0405ba12f 100644 (file)
 #define SI_SHADER_PRIVATE_H
 
 #include "si_shader.h"
-#include "gallivm/lp_bld_flow.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "tgsi/tgsi_parse.h"
 #include "ac_shader_abi.h"
 
 #include <llvm-c/Core.h>
 
 struct pipe_debug_callback;
 
-#define RADEON_LLVM_MAX_INPUT_SLOTS 32
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
-#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
-
-#define RADEON_LLVM_MAX_SYSTEM_VALUES 11
-#define RADEON_LLVM_MAX_ADDRS 16
 
 struct si_shader_output_values {
        LLVMValueRef values[4];
@@ -52,8 +43,6 @@ struct si_shader_output_values {
 };
 
 struct si_shader_context {
-       struct lp_build_tgsi_context bld_base;
-       struct gallivm_state gallivm;
        struct ac_llvm_context ac;
        struct si_shader *shader;
        struct si_screen *screen;
@@ -69,42 +58,11 @@ struct si_shader_context {
        struct ac_shader_args args;
        struct ac_shader_abi abi;
 
-       /** This function is responsible for initilizing the inputs array and will be
-         * called once for each input declared in the TGSI shader.
-         */
-       void (*load_input)(struct si_shader_context *,
-                          unsigned input_index,
-                          const struct tgsi_full_declaration *decl,
-                          LLVMValueRef out[4]);
-
-       /** This array contains the input values for the shader.  Typically these
-         * values will be in the form of a target intrinsic that will inform the
-         * backend how to load the actual inputs to the shader.
-         */
-       struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS];
        LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
-       LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
-       LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS];
-
-       /** This pointer is used to contain the temporary values.
-         * The amount of temporary used in tgsi can't be bound to a max value and
-         * thus we must allocate this array at runtime.
-         */
-       LLVMValueRef *temps;
-       unsigned temps_count;
-       LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
-
-       LLVMValueRef *imms;
-       unsigned imms_num;
 
        LLVMBasicBlockRef merged_wrap_if_entry_block;
        int merged_wrap_if_label;
 
-       struct tgsi_array_info *temp_arrays;
-       LLVMValueRef *temp_array_allocas;
-
-       LLVMValueRef undef_alloca;
-
        LLVMValueRef main_fn;
        LLVMTypeRef return_type;
 
@@ -233,12 +191,6 @@ struct si_shader_context {
        LLVMValueRef i1true;
 };
 
-static inline struct si_shader_context *
-si_shader_context(struct lp_build_tgsi_context *bld_base)
-{
-       return (struct si_shader_context*)bld_base;
-}
-
 static inline struct si_shader_context *
 si_shader_context_from_abi(struct ac_shader_abi *abi)
 {
@@ -255,12 +207,6 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
                         struct pipe_debug_callback *debug,
                         bool less_optimized, unsigned wave_size);
 
-LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
-                         enum tgsi_opcode_type type);
-
-LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
-                    enum tgsi_opcode_type type, LLVMValueRef value);
-
 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
                                 LLVMValueRef index,
                                 unsigned num);
@@ -271,8 +217,7 @@ void si_llvm_context_init(struct si_shader_context *ctx,
                          unsigned wave_size,
                          unsigned ballot_mask_bits);
 void si_llvm_context_set_ir(struct si_shader_context *ctx,
-                           struct si_shader *shader,
-                           struct nir_shader *nir);
+                           struct si_shader *shader);
 
 void si_llvm_create_func(struct si_shader_context *ctx,
                         const char *name,
@@ -282,18 +227,6 @@ void si_llvm_dispose(struct si_shader_context *ctx);
 
 void si_llvm_optimize_module(struct si_shader_context *ctx);
 
-LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
-                                     LLVMTypeRef type,
-                                     LLVMValueRef ptr,
-                                     LLVMValueRef ptr2);
-
-LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
-                               const struct tgsi_full_src_register *reg,
-                               enum tgsi_opcode_type type,
-                               unsigned swizzle);
-
-void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible);
-
 LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
                                   LLVMTypeRef type,
                                   LLVMValueRef vertex_index,
@@ -306,34 +239,10 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
                                   bool is_patch,
                                   bool is_compact,
                                   bool load_input);
-
-LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
-                                  unsigned input_index,
-                                  unsigned vtx_offset_param,
-                                  LLVMTypeRef type,
-                                  unsigned swizzle);
-
 LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi,
                                        enum glsl_interp_mode interp,
                                        unsigned location);
-
-void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
-                       const struct tgsi_full_instruction *inst,
-                       const struct tgsi_opcode_info *info,
-                       unsigned index,
-                       LLVMValueRef dst[4]);
-
-LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
-                                  const struct tgsi_ind_register *ind,
-                                  unsigned addr_mul, int rel_index);
-LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
-                                          const struct tgsi_ind_register *ind,
-                                          int rel_index, unsigned num);
 LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
-
-void si_shader_context_init_alu(struct si_shader_context *ctx);
-void si_shader_context_init_mem(struct si_shader_context *ctx);
-
 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
                                  LLVMValueRef list, LLVMValueRef index,
                                  enum ac_descriptor_type type);
@@ -342,14 +251,7 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
                                enum ac_descriptor_type desc_type,
                                bool uses_store, bool bindless);
 LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi);
-
-void si_load_system_value(struct si_shader_context *ctx,
-                         unsigned index,
-                         const struct tgsi_full_declaration *decl);
 void si_declare_compute_memory(struct si_shader_context *ctx);
-void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
-                                   const struct tgsi_full_declaration *decl);
-
 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
                                 unsigned swizzle);
 void si_llvm_export_vs(struct si_shader_context *ctx,
@@ -365,10 +267,6 @@ void si_llvm_load_input_vs(
        struct si_shader_context *ctx,
        unsigned input_index,
        LLVMValueRef out[4]);
-void si_llvm_load_input_fs(
-       struct si_shader_context *ctx,
-       unsigned input_index,
-       LLVMValueRef out[4]);
 
 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
new file mode 100644 (file)
index 0000000..64ceaf7
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "ac_llvm_util.h"
+#include "util/u_memory.h"
+
+struct si_llvm_diagnostics {
+       struct pipe_debug_callback *debug;
+       unsigned retval;
+};
+
+static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
+{
+       struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
+       LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+       const char *severity_str = NULL;
+
+       switch (severity) {
+       case LLVMDSError:
+               severity_str = "error";
+               break;
+       case LLVMDSWarning:
+               severity_str = "warning";
+               break;
+       case LLVMDSRemark:
+       case LLVMDSNote:
+       default:
+               return;
+       }
+
+       char *description = LLVMGetDiagInfoDescription(di);
+
+       pipe_debug_message(diag->debug, SHADER_INFO,
+                          "LLVM diagnostic (%s): %s", severity_str, description);
+
+       if (severity == LLVMDSError) {
+               diag->retval = 1;
+               fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
+       }
+
+       LLVMDisposeMessage(description);
+}
+
+/**
+ * Compile an LLVM module to machine code.
+ *
+ * @returns 0 for success, 1 for failure
+ */
+unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
+                        struct ac_llvm_compiler *compiler,
+                        struct pipe_debug_callback *debug,
+                        bool less_optimized, unsigned wave_size)
+{
+       struct ac_compiler_passes *passes = compiler->passes;
+
+       if (wave_size == 32)
+               passes = compiler->passes_wave32;
+       else if (less_optimized && compiler->low_opt_passes)
+               passes = compiler->low_opt_passes;
+
+       struct si_llvm_diagnostics diag;
+       LLVMContextRef llvm_ctx;
+
+       diag.debug = debug;
+       diag.retval = 0;
+
+       /* Setup Diagnostic Handler*/
+       llvm_ctx = LLVMGetModuleContext(M);
+
+       LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
+
+       /* Compile IR. */
+       if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
+                                     &binary->elf_size))
+               diag.retval = 1;
+
+       if (diag.retval != 0)
+               pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
+       return diag.retval;
+}
+
+void si_shader_binary_clean(struct si_shader_binary *binary)
+{
+       free((void *)binary->elf_buffer);
+       binary->elf_buffer = NULL;
+
+       free(binary->llvm_ir_string);
+       binary->llvm_ir_string = NULL;
+}
+
+void si_llvm_context_init(struct si_shader_context *ctx,
+                         struct si_screen *sscreen,
+                         struct ac_llvm_compiler *compiler,
+                         unsigned wave_size,
+                         unsigned ballot_mask_bits)
+{
+       /* Initialize the gallivm object:
+        * We are only using the module, context, and builder fields of this struct.
+        * This should be enough for us to be able to pass our gallivm struct to the
+        * helper functions in the gallivm module.
+        */
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->screen = sscreen;
+       ctx->compiler = compiler;
+
+       ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
+                            sscreen->info.family,
+                            AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
+                            wave_size, ballot_mask_bits);
+
+       ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
+       ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
+       ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
+       ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
+       ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
+       ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
+       ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
+       ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+       ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+       ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+       ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+       ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+       ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
+       ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
+       ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
+}
+
+/* Set the context to a certain TGSI shader. Can be called repeatedly
+ * to change the shader. */
+void si_llvm_context_set_ir(struct si_shader_context *ctx,
+                           struct si_shader *shader)
+{
+       struct si_shader_selector *sel = shader->selector;
+       const struct tgsi_shader_info *info = &sel->info;
+
+       ctx->shader = shader;
+       ctx->type = sel->type;
+
+       ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
+       ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
+
+       ctx->num_samplers = util_last_bit(info->samplers_declared);
+       ctx->num_images = util_last_bit(info->images_declared);
+}
+
+void si_llvm_create_func(struct si_shader_context *ctx,
+                        const char *name,
+                        LLVMTypeRef *return_types, unsigned num_return_elems)
+{
+       LLVMTypeRef ret_type;
+       enum ac_llvm_calling_convention call_conv;
+       enum pipe_shader_type real_shader_type;
+
+       if (num_return_elems)
+               ret_type = LLVMStructTypeInContext(ctx->ac.context,
+                                                  return_types,
+                                                  num_return_elems, true);
+       else
+               ret_type = ctx->voidt;
+
+       real_shader_type = ctx->type;
+
+       /* LS is merged into HS (TCS), and ES is merged into GS. */
+       if (ctx->screen->info.chip_class >= GFX9) {
+               if (ctx->shader->key.as_ls)
+                       real_shader_type = PIPE_SHADER_TESS_CTRL;
+               else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
+                       real_shader_type = PIPE_SHADER_GEOMETRY;
+       }
+
+       switch (real_shader_type) {
+       case PIPE_SHADER_VERTEX:
+       case PIPE_SHADER_TESS_EVAL:
+               call_conv = AC_LLVM_AMDGPU_VS;
+               break;
+       case PIPE_SHADER_TESS_CTRL:
+               call_conv = AC_LLVM_AMDGPU_HS;
+               break;
+       case PIPE_SHADER_GEOMETRY:
+               call_conv = AC_LLVM_AMDGPU_GS;
+               break;
+       case PIPE_SHADER_FRAGMENT:
+               call_conv = AC_LLVM_AMDGPU_PS;
+               break;
+       case PIPE_SHADER_COMPUTE:
+               call_conv = AC_LLVM_AMDGPU_CS;
+               break;
+       default:
+               unreachable("Unhandle shader type");
+       }
+
+       /* Setup the function */
+       ctx->return_type = ret_type;
+       ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
+                                    ret_type, ctx->ac.module);
+}
+
+void si_llvm_optimize_module(struct si_shader_context *ctx)
+{
+       /* Dump LLVM IR before any optimization passes */
+       if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
+           si_can_dump_shader(ctx->screen, ctx->type))
+               LLVMDumpModule(ctx->ac.module);
+
+       /* Run the pass */
+       LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
+       LLVMDisposeBuilder(ctx->ac.builder);
+}
+
+void si_llvm_dispose(struct si_shader_context *ctx)
+{
+       LLVMDisposeModule(ctx->ac.module);
+       LLVMContextDispose(ctx->ac.context);
+       ac_llvm_context_dispose(&ctx->ac);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_build.c b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c
new file mode 100644 (file)
index 0000000..e362521
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <llvm/Config/llvm-config.h>
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "sid.h"
+#include "ac_llvm_util.h"
+
+/**
+ * Return a value that is equal to the given i32 \p index if it lies in [0,num)
+ * or an undefined value in the same interval otherwise.
+ */
+LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
+                                LLVMValueRef index,
+                                unsigned num)
+{
+       LLVMBuilderRef builder = ctx->ac.builder;
+       LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+       LLVMValueRef cc;
+
+       if (util_is_power_of_two_or_zero(num)) {
+               index = LLVMBuildAnd(builder, index, c_max, "");
+       } else {
+               /* In theory, this MAX pattern should result in code that is
+                * as good as the bit-wise AND above.
+                *
+                * In practice, LLVM generates worse code (at the time of
+                * writing), because its value tracking is not strong enough.
+                */
+               cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
+               index = LLVMBuildSelect(builder, cc, index, c_max, "");
+       }
+
+       return index;
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+                                 LLVMValueRef rsrc)
+{
+       if (ctx->screen->info.chip_class <= GFX7) {
+               return rsrc;
+       } else {
+               LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+               LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+               LLVMValueRef tmp;
+
+               tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
+               tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
+               return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
+       }
+}
+
+/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
+ * adjust "index" to point to FMASK. */
+LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+                               LLVMValueRef list, LLVMValueRef index,
+                               enum ac_descriptor_type desc_type,
+                               bool uses_store, bool bindless)
+{
+       LLVMBuilderRef builder = ctx->ac.builder;
+       LLVMValueRef rsrc;
+
+       if (desc_type == AC_DESC_BUFFER) {
+               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+                                     ctx->i32_1);
+               list = LLVMBuildPointerCast(builder, list,
+                                           ac_array_in_const32_addr_space(ctx->v4i32), "");
+       } else {
+               assert(desc_type == AC_DESC_IMAGE ||
+                      desc_type == AC_DESC_FMASK);
+       }
+
+       if (bindless)
+               rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
+       else
+               rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+
+       if (desc_type == AC_DESC_IMAGE && uses_store)
+               rsrc = force_dcc_off(ctx, rsrc);
+       return rsrc;
+}
+
+/**
+ * Load an image view, fmask view. or sampler state descriptor.
+ */
+LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+                                 LLVMValueRef list, LLVMValueRef index,
+                                 enum ac_descriptor_type type)
+{
+       LLVMBuilderRef builder = ctx->ac.builder;
+
+       switch (type) {
+       case AC_DESC_IMAGE:
+               /* The image is at [0:7]. */
+               index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+               break;
+       case AC_DESC_BUFFER:
+               /* The buffer is in [4:7]. */
+               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+                                     ctx->i32_1);
+               list = LLVMBuildPointerCast(builder, list,
+                                           ac_array_in_const32_addr_space(ctx->v4i32), "");
+               break;
+       case AC_DESC_FMASK:
+               /* The FMASK is at [8:15]. */
+               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+                                     ctx->i32_1);
+               break;
+       case AC_DESC_SAMPLER:
+               /* The sampler state is at [12:15]. */
+               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+                                     LLVMConstInt(ctx->i32, 3, 0));
+               list = LLVMBuildPointerCast(builder, list,
+                                           ac_array_in_const32_addr_space(ctx->v4i32), "");
+               break;
+       case AC_DESC_PLANE_0:
+       case AC_DESC_PLANE_1:
+       case AC_DESC_PLANE_2:
+               /* Only used for the multiplane image support for Vulkan. Should
+                * never be reached in radeonsi.
+                */
+               unreachable("Plane descriptor requested in radeonsi.");
+       }
+
+       return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
+{
+       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+       struct ac_image_args args = {};
+       LLVMValueRef ptr, image, fmask;
+
+       /* Ignore src0, because KHR_blend_func_extended disallows multiple render
+        * targets.
+        */
+
+       /* Load the image descriptor. */
+       STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+       ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
+       ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
+                                  ac_array_in_const32_addr_space(ctx->v8i32), "");
+       image = ac_build_load_to_sgpr(&ctx->ac, ptr,
+                       LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
+
+       unsigned chan = 0;
+
+       args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
+
+       if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+               args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
+
+       /* Get the current render target layer index. */
+       if (ctx->shader->key.mono.u.ps.fbfetch_layered)
+               args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
+
+       if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+               args.coords[chan++] = si_get_sample_id(ctx);
+
+       if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
+           !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
+               fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
+                       LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
+
+               ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
+                                        ctx->shader->key.mono.u.ps.fbfetch_layered);
+       }
+
+       args.opcode = ac_image_load;
+       args.resource = image;
+       args.dmask = 0xf;
+       args.attributes = AC_FUNC_ATTR_READNONE;
+
+       if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+               args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+                       ac_image_2darraymsaa : ac_image_2dmsaa;
+       else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+               args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+                       ac_image_1darray : ac_image_1d;
+       else
+               args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+                       ac_image_2darray : ac_image_2d;
+
+       return ac_build_image_opcode(&ctx->ac, &args);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
deleted file mode 100644 (file)
index 4be410e..0000000
+++ /dev/null
@@ -1,834 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "ac_llvm_util.h"
-
-void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
-{
-       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-       LLVMBuilderRef builder = ctx->ac.builder;
-
-       if (ctx->shader->selector->force_correct_derivs_after_kill) {
-               /* Kill immediately while maintaining WQM. */
-               ac_build_kill_if_false(&ctx->ac,
-                                      ac_build_wqm_vote(&ctx->ac, visible));
-
-               LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
-               mask = LLVMBuildAnd(builder, mask, visible, "");
-               LLVMBuildStore(builder, mask, ctx->postponed_kill);
-               return;
-       }
-
-       ac_build_kill_if_false(&ctx->ac, visible);
-}
-
-static void kil_emit(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef visible;
-
-       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
-               const struct tgsi_full_instruction *inst = emit_data->inst;
-               struct si_shader_context *ctx = si_shader_context(bld_base);
-               LLVMBuilderRef builder = ctx->ac.builder;
-               unsigned i;
-               LLVMValueRef conds[TGSI_NUM_CHANNELS];
-
-               for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-                       LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
-                       /* UGE because NaN shouldn't get killed */
-                       conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value,
-                                               ctx->ac.f32_0, "");
-               }
-
-               /* And the conditions together */
-               for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
-                       conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
-               }
-               visible = conds[0];
-       } else {
-               assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
-               visible = ctx->i1false;
-       }
-
-       si_llvm_emit_kill(&ctx->abi, visible);
-}
-
-static void emit_icmp(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       unsigned pred;
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       switch (emit_data->inst->Instruction.Opcode) {
-       case TGSI_OPCODE_USEQ:
-       case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
-       case TGSI_OPCODE_USNE:
-       case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
-       case TGSI_OPCODE_USGE:
-       case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
-       case TGSI_OPCODE_USLT:
-       case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
-       case TGSI_OPCODE_ISGE:
-       case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
-       case TGSI_OPCODE_ISLT:
-       case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
-       default:
-               assert(!"unknown instruction");
-               pred = 0;
-               break;
-       }
-
-       LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred,
-                       emit_data->args[0], emit_data->args[1],"");
-
-       v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
-       emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_ucmp(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);
-
-       LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0,
-                                      ctx->i32_0, "");
-
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], "");
-}
-
-static void emit_cmp(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef cond, *args = emit_data->args;
-
-       cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0],
-                            ctx->ac.f32_0, "");
-
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], "");
-}
-
-static void emit_set_cond(const struct lp_build_tgsi_action *action,
-                         struct lp_build_tgsi_context *bld_base,
-                         struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMRealPredicate pred;
-       LLVMValueRef cond;
-
-       /* Use ordered for everything but NE (which is usual for
-        * float comparisons)
-        */
-       switch (emit_data->inst->Instruction.Opcode) {
-       case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
-       case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
-       case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
-       case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
-       case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
-       case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
-       default: assert(!"unknown instruction"); pred = 0; break;
-       }
-
-       cond = LLVMBuildFCmp(ctx->ac.builder,
-               pred, emit_data->args[0], emit_data->args[1], "");
-
-       emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder,
-               cond, ctx->ac.f32_1, ctx->ac.f32_0, "");
-}
-
-static void emit_fcmp(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMRealPredicate pred;
-
-       /* Use ordered for everything but NE (which is usual for
-        * float comparisons)
-        */
-       switch (emit_data->inst->Instruction.Opcode) {
-       case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
-       case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
-       case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
-       case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
-       default: assert(!"unknown instruction"); pred = 0; break;
-       }
-
-       LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
-                       emit_data->args[0], emit_data->args[1],"");
-
-       v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
-       emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_dcmp(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMRealPredicate pred;
-
-       /* Use ordered for everything but NE (which is usual for
-        * float comparisons)
-        */
-       switch (emit_data->inst->Instruction.Opcode) {
-       case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
-       case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
-       case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
-       case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
-       default: assert(!"unknown instruction"); pred = 0; break;
-       }
-
-       LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
-                       emit_data->args[0], emit_data->args[1],"");
-
-       v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
-       emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_not(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
-       emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, "");
-}
-
-static void emit_arl(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef floor_index =
-               ac_build_intrinsic(&ctx->ac, "llvm.floor.f32", ctx->f32,
-                                  &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
-       emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
-                       floor_index, ctx->i32, "");
-}
-
-static void emit_and(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_or(const struct lp_build_tgsi_action *action,
-                   struct lp_build_tgsi_context *bld_base,
-                   struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_uadd(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_udiv(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_idiv(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_mod(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_umod(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_shl(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_ushr(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-static void emit_ishr(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_xor(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder,
-                       emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_ssg(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       LLVMValueRef  val;
-
-       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
-               val = ac_build_isign(&ctx->ac, emit_data->args[0], 64);
-       } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
-               val = ac_build_isign(&ctx->ac, emit_data->args[0], 32);
-       } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
-               val = ac_build_fsign(&ctx->ac, emit_data->args[0], 64);
-       } else {
-               val = ac_build_fsign(&ctx->ac, emit_data->args[0], 32);
-       }
-
-       emit_data->output[emit_data->chan] = val;
-}
-
-static void emit_ineg(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder,
-                       emit_data->args[0], "");
-}
-
-static void emit_dneg(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder,
-                       emit_data->args[0], "");
-}
-
-static void emit_frac(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       unsigned bitsize;
-
-       if (emit_data->info->opcode == TGSI_OPCODE_FRC)
-               bitsize = 32;
-       else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
-               bitsize = 64;
-       else {
-               assert(0);
-               return;
-       }
-
-       emit_data->output[emit_data->chan] =
-               ac_build_fract(&ctx->ac, emit_data->args[0], bitsize);
-}
-
-static void emit_f2i(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
-                       emit_data->args[0], ctx->i32, "");
-}
-
-static void emit_f2u(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder,
-                       emit_data->args[0], ctx->i32, "");
-}
-
-static void emit_i2f(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder,
-                       emit_data->args[0], ctx->f32, "");
-}
-
-static void emit_u2f(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder,
-                       emit_data->args[0], ctx->f32, "");
-}
-
-static void
-build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
-                          struct lp_build_tgsi_context *bld_base,
-                          struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] =
-               ac_build_intrinsic(&ctx->ac, action->intr_name,
-                                  emit_data->dst_type, emit_data->args,
-                                  emit_data->arg_count, AC_FUNC_ATTR_READNONE);
-}
-
-static void emit_bfi(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef bfi_args[3];
-       LLVMValueRef bfi_sm5;
-       LLVMValueRef cond;
-
-       // Calculate the bitmask: (((1 << src3) - 1) << src2
-       bfi_args[0] = LLVMBuildShl(builder,
-                                  LLVMBuildSub(builder,
-                                               LLVMBuildShl(builder,
-                                                            ctx->i32_1,
-                                                            emit_data->args[3], ""),
-                                               ctx->i32_1, ""),
-                                  emit_data->args[2], "");
-
-       bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
-                                  emit_data->args[2], "");
-
-       bfi_args[2] = emit_data->args[0];
-
-       /* Calculate:
-        *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
-        * Use the right-hand side, which the LLVM backend can convert to V_BFI.
-        */
-       bfi_sm5 =
-               LLVMBuildXor(builder, bfi_args[2],
-                       LLVMBuildAnd(builder, bfi_args[0],
-                               LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
-                                            ""), ""), "");
-
-       /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
-        * uses the convenient V_BFI lowering for the above, which follows SM5
-        * and disagrees with GLSL semantics when bits (src3) is 32.
-        */
-       cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
-                            LLVMConstInt(ctx->i32, 32, 0), "");
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
-}
-
-static void emit_bfe(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       /* FIXME: LLVM 7 returns incorrect result when count is 0.
-        * https://bugs.freedesktop.org/show_bug.cgi?id=107276
-        */
-       LLVMValueRef zero = ctx->i32_0;
-       LLVMValueRef bfe_sm5 =
-               ac_build_bfe(&ctx->ac, emit_data->args[0],
-                            emit_data->args[1], emit_data->args[2],
-                            emit_data->info->opcode == TGSI_OPCODE_IBFE);
-
-       /* Correct for GLSL semantics. */
-       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
-                                         LLVMConstInt(ctx->i32, 32, 0), "");
-       LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
-                                          zero, "");
-       bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
-}
-
-/* this is ffs in C */
-static void emit_lsb(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]);
-}
-
-/* Find the last bit set. */
-static void emit_umsb(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       emit_data->output[emit_data->chan] =
-               ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type);
-}
-
-/* Find the last bit opposite of the sign bit. */
-static void emit_imsb(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       emit_data->output[emit_data->chan] =
-               ac_build_imsb(&ctx->ac, emit_data->args[0],
-                             emit_data->dst_type);
-}
-
-static void emit_iabs(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       emit_data->output[emit_data->chan] =
-               ac_build_imax(&ctx->ac,  emit_data->args[0],
-                             LLVMBuildNeg(ctx->ac.builder, emit_data->args[0], ""));
-}
-
-static void emit_minmax_int(const struct lp_build_tgsi_action *action,
-                           struct lp_build_tgsi_context *bld_base,
-                           struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMIntPredicate op;
-
-       switch (emit_data->info->opcode) {
-       default:
-               assert(0);
-       case TGSI_OPCODE_IMAX:
-       case TGSI_OPCODE_I64MAX:
-               op = LLVMIntSGT;
-               break;
-       case TGSI_OPCODE_IMIN:
-       case TGSI_OPCODE_I64MIN:
-               op = LLVMIntSLT;
-               break;
-       case TGSI_OPCODE_UMAX:
-       case TGSI_OPCODE_U64MAX:
-               op = LLVMIntUGT;
-               break;
-       case TGSI_OPCODE_UMIN:
-       case TGSI_OPCODE_U64MIN:
-               op = LLVMIntULT;
-               break;
-       }
-
-       emit_data->output[emit_data->chan] =
-               LLVMBuildSelect(ctx->ac.builder,
-                               LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0],
-                                             emit_data->args[1], ""),
-                               emit_data->args[0],
-                               emit_data->args[1], "");
-}
-
-static void emit_pk2h(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef v[] = {
-               lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X),
-               lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y),
-       };
-
-
-       /* From the GLSL 4.50 spec:
-        *   "The rounding mode cannot be set and is undefined."
-        *
-        * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
-        */
-       emit_data->output[emit_data->chan] =
-               LLVMBuildBitCast(ctx->ac.builder, ac_build_cvt_pkrtz_f16(&ctx->ac, v),
-                                ctx->i32, "");
-}
-
-static void emit_up2h(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMTypeRef i16;
-       LLVMValueRef const16, input, val;
-       unsigned i;
-
-       i16 = LLVMInt16TypeInContext(ctx->ac.context);
-       const16 = LLVMConstInt(ctx->i32, 16, 0);
-       input = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-
-       for (i = 0; i < 2; i++) {
-               val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
-               val = LLVMBuildTrunc(ctx->ac.builder, val, i16, "");
-               val = ac_to_float(&ctx->ac, val);
-               emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, "");
-       }
-}
-
-static void emit_fdiv(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       emit_data->output[emit_data->chan] =
-               ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]);
-}
-
-/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
- * the target machine. f64 needs global unsafe math flags to get rsq. */
-static void emit_rsq(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       LLVMValueRef sqrt =
-               ac_build_intrinsic(&ctx->ac, "llvm.sqrt.f32", ctx->f32,
-                                  &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
-
-       emit_data->output[emit_data->chan] =
-               ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt);
-}
-
-static void dfracexp_emit(const struct lp_build_tgsi_action *action,
-                         struct lp_build_tgsi_context *bld_base,
-                         struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef in = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-
-       emit_data->output[emit_data->chan] =
-               ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
-                                  ctx->ac.f64, &in, 1, 0);
-       emit_data->output1[emit_data->chan] =
-               ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64",
-                                  ctx->ac.i32, &in, 1, 0);
-}
-
-void si_shader_context_init_alu(struct si_shader_context *ctx)
-{
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
-       lp_set_default_actions(bld_base);
-
-       bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
-       bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
-       bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
-       bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32";
-       bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
-       bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
-       bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
-       bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
-       bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
-       bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
-       bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
-       bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
-       bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
-       bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
-       bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
-       bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
-       bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
-       bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
-       bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
-       bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
-       bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
-       bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
-       bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
-       bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
-       bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
-       bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
-       bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
-
-       /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
-       if (ctx->screen->info.chip_class >= GFX10) {
-               bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
-               bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
-       } else {
-               bld_base->op_actions[TGSI_OPCODE_FMA].emit =
-                       bld_base->op_actions[TGSI_OPCODE_MAD].emit;
-       }
-
-       bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
-       bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
-       bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
-       bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
-       bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
-       bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
-       bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
-       bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
-       bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
-       bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
-       bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
-       bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
-       bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
-       bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
-       bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
-       bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
-       bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
-       bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32";
-       bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
-       bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
-       bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
-       bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
-       bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
-       bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
-       bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
-       bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
-       bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
-       bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
-       bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
-       bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
-       bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
-       bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
-       bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
-       bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
-       bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
-       bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
-       bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
-       bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
-       bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
-       bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
-       bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
-       bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
-       bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
-       bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
-       bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
-       bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
-       bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
-       bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
-       bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
-       bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
-       bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
-       bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
-
-       bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
-       bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
-       bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
-       bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
-
-       bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
-       bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
-
-       bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
-       bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
-       bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
-       bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
-
-       bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
-       bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
-       bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
-       bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
-}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
deleted file mode 100644 (file)
index 21b861b..0000000
+++ /dev/null
@@ -1,1852 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <llvm/Config/llvm-config.h>
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "sid.h"
-#include "tgsi/tgsi_build.h"
-#include "tgsi/tgsi_util.h"
-#include "ac_llvm_util.h"
-
-static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
-                          struct lp_build_emit_data *emit_data,
-                          LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
-                          LLVMValueRef *fmask_ptr);
-
-/**
- * Given a v8i32 resource descriptor for a buffer, extract the size of the
- * buffer in number of elements and return it as an i32.
- */
-static LLVMValueRef get_buffer_size(
-       struct lp_build_tgsi_context *bld_base,
-       LLVMValueRef descriptor)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef size =
-               LLVMBuildExtractElement(builder, descriptor,
-                                       LLVMConstInt(ctx->i32, 2, 0), "");
-
-       if (ctx->screen->info.chip_class == GFX8) {
-               /* On GFX8, the descriptor contains the size in bytes,
-                * but TXQ must return the size in elements.
-                * The stride is always non-zero for resources using TXQ.
-                */
-               LLVMValueRef stride =
-                       LLVMBuildExtractElement(builder, descriptor,
-                                               ctx->i32_1, "");
-               stride = LLVMBuildLShr(builder, stride,
-                                      LLVMConstInt(ctx->i32, 16, 0), "");
-               stride = LLVMBuildAnd(builder, stride,
-                                     LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
-
-               size = LLVMBuildUDiv(builder, size, stride, "");
-       }
-
-       return size;
-}
-
-static LLVMValueRef
-shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
-                        const struct tgsi_full_src_register *reg,
-                        bool ubo)
-{
-       LLVMValueRef index;
-
-       if (!reg->Register.Indirect) {
-               index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
-       } else {
-               index = si_get_indirect_index(ctx, &reg->Indirect,
-                                             1, reg->Register.Index);
-       }
-
-       if (ubo)
-               return ctx->abi.load_ubo(&ctx->abi, index);
-       else
-               return ctx->abi.load_ssbo(&ctx->abi, index, false);
-}
-
-static enum ac_image_dim
-ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
-{
-       switch (target) {
-       case TGSI_TEXTURE_1D:
-       case TGSI_TEXTURE_SHADOW1D:
-               if (screen->info.chip_class == GFX9)
-                       return ac_image_2d;
-               return ac_image_1d;
-       case TGSI_TEXTURE_2D:
-       case TGSI_TEXTURE_SHADOW2D:
-       case TGSI_TEXTURE_RECT:
-       case TGSI_TEXTURE_SHADOWRECT:
-               return ac_image_2d;
-       case TGSI_TEXTURE_3D:
-               return ac_image_3d;
-       case TGSI_TEXTURE_CUBE:
-       case TGSI_TEXTURE_SHADOWCUBE:
-       case TGSI_TEXTURE_CUBE_ARRAY:
-       case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
-               return ac_image_cube;
-       case TGSI_TEXTURE_1D_ARRAY:
-       case TGSI_TEXTURE_SHADOW1D_ARRAY:
-               if (screen->info.chip_class == GFX9)
-                       return ac_image_2darray;
-               return ac_image_1darray;
-       case TGSI_TEXTURE_2D_ARRAY:
-       case TGSI_TEXTURE_SHADOW2D_ARRAY:
-               return ac_image_2darray;
-       case TGSI_TEXTURE_2D_MSAA:
-               return ac_image_2dmsaa;
-       case TGSI_TEXTURE_2D_ARRAY_MSAA:
-               return ac_image_2darraymsaa;
-       default:
-               unreachable("unhandled texture type");
-       }
-}
-
-static enum ac_image_dim
-ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
-{
-       enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target);
-
-       /* Match the resource type set in the descriptor. */
-       if (dim == ac_image_cube ||
-           (screen->info.chip_class <= GFX8 && dim == ac_image_3d))
-               dim = ac_image_2darray;
-       else if (target == TGSI_TEXTURE_2D && screen->info.chip_class == GFX9) {
-               /* When a single layer of a 3D texture is bound, the shader
-                * will refer to a 2D target, but the descriptor has a 3D type.
-                * Since the HW ignores BASE_ARRAY in this case, we need to
-                * send 3 coordinates. This doesn't hurt when the underlying
-                * texture is non-3D.
-                */
-               dim = ac_image_3d;
-       }
-
-       return dim;
-}
-
-/**
- * Given a 256-bit resource descriptor, force the DCC enable bit to off.
- *
- * At least on Tonga, executing image stores on images with DCC enabled and
- * non-trivial can eventually lead to lockups. This can occur when an
- * application binds an image as read-only but then uses a shader that writes
- * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
- * program termination) in this case, but it doesn't cost much to be a bit
- * nicer: disabling DCC in the shader still leads to undefined results but
- * avoids the lockup.
- */
-static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
-                                 LLVMValueRef rsrc)
-{
-       if (ctx->screen->info.chip_class <= GFX7) {
-               return rsrc;
-       } else {
-               LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
-               LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
-               LLVMValueRef tmp;
-
-               tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
-               tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
-               return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
-       }
-}
-
-/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
- * adjust "index" to point to FMASK. */
-LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
-                               LLVMValueRef list, LLVMValueRef index,
-                               enum ac_descriptor_type desc_type,
-                               bool uses_store, bool bindless)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef rsrc;
-
-       if (desc_type == AC_DESC_BUFFER) {
-               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
-                                     ctx->i32_1);
-               list = LLVMBuildPointerCast(builder, list,
-                                           ac_array_in_const32_addr_space(ctx->v4i32), "");
-       } else {
-               assert(desc_type == AC_DESC_IMAGE ||
-                      desc_type == AC_DESC_FMASK);
-       }
-
-       if (bindless)
-               rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
-       else
-               rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
-
-       if (desc_type == AC_DESC_IMAGE && uses_store)
-               rsrc = force_dcc_off(ctx, rsrc);
-       return rsrc;
-}
-
-/**
- * Load the resource descriptor for \p image.
- */
-static void
-image_fetch_rsrc(
-       struct lp_build_tgsi_context *bld_base,
-       const struct tgsi_full_src_register *image,
-       bool fmask, bool is_store, unsigned target,
-       LLVMValueRef *rsrc)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       bool bindless = image->Register.File != TGSI_FILE_IMAGE;
-       LLVMValueRef rsrc_ptr, index;
-
-       if (bindless) {
-               /* Bindless descriptors are accessible from a different pair of
-                * user SGPR indices.
-                */
-               rsrc_ptr = ac_get_arg(&ctx->ac,
-                                     ctx->bindless_samplers_and_images);
-               index = lp_build_emit_fetch_src(bld_base, image, TGSI_TYPE_UNSIGNED, 0);
-
-               /* Bindless image descriptors use 16-dword slots. */
-               index = LLVMBuildMul(ctx->ac.builder, index,
-                                    LLVMConstInt(ctx->i32, 2, 0), "");
-               /* FMASK is right after the image. */
-               if (fmask)
-                       index = LLVMBuildAdd(ctx->ac.builder, index, ctx->i32_1, "");
-       } else {
-               rsrc_ptr = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
-
-               if (!image->Register.Indirect) {
-                       index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
-               } else {
-                       /* From the GL_ARB_shader_image_load_store extension spec:
-                        *
-                        *    If a shader performs an image load, store, or atomic
-                        *    operation using an image variable declared as an array,
-                        *    and if the index used to select an individual element is
-                        *    negative or greater than or equal to the size of the
-                        *    array, the results of the operation are undefined but may
-                        *    not lead to termination.
-                        */
-                       index = si_get_bounded_indirect_index(ctx, &image->Indirect,
-                                                             image->Register.Index,
-                                                             ctx->num_images);
-               }
-               /* FMASKs are separate from images. */
-               if (fmask) {
-                       index = LLVMBuildAdd(ctx->ac.builder, index,
-                                            LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
-               }
-               index = LLVMBuildSub(ctx->ac.builder,
-                                    LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
-                                    index, "");
-       }
-
-       *rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
-                                  fmask ? AC_DESC_FMASK :
-                                  target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
-                                  is_store, bindless);
-}
-
-static void image_fetch_coords(
-               struct lp_build_tgsi_context *bld_base,
-               const struct tgsi_full_instruction *inst,
-               unsigned src, LLVMValueRef desc,
-               LLVMValueRef *coords)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       unsigned target = inst->Memory.Texture;
-       unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
-       LLVMValueRef tmp;
-       int chan;
-
-       for (chan = 0; chan < num_coords; ++chan) {
-               tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
-               tmp = ac_to_integer(&ctx->ac, tmp);
-               coords[chan] = tmp;
-       }
-
-       if (target == TGSI_TEXTURE_2D_MSAA ||
-           target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-               /* Need the sample index as well. */
-               tmp = lp_build_emit_fetch(bld_base, inst, src, TGSI_SWIZZLE_W);
-               coords[chan] = ac_to_integer(&ctx->ac, tmp);
-       }
-
-       if (ctx->screen->info.chip_class == GFX9) {
-               /* 1D textures are allocated and used as 2D on GFX9. */
-               if (target == TGSI_TEXTURE_1D) {
-                       coords[1] = ctx->i32_0;
-               } else if (target == TGSI_TEXTURE_1D_ARRAY) {
-                       coords[2] = coords[1];
-                       coords[1] = ctx->i32_0;
-               } else if (target == TGSI_TEXTURE_2D) {
-                       /* The hw can't bind a slice of a 3D image as a 2D
-                        * image, because it ignores BASE_ARRAY if the target
-                        * is 3D. The workaround is to read BASE_ARRAY and set
-                        * it as the 3rd address operand for all 2D images.
-                        */
-                       LLVMValueRef first_layer, const5, mask;
-
-                       const5 = LLVMConstInt(ctx->i32, 5, 0);
-                       mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
-                       first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
-                       first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
-
-                       coords[2] = first_layer;
-               }
-       }
-}
-
-static unsigned get_cache_policy(struct si_shader_context *ctx,
-                                const struct tgsi_full_instruction *inst,
-                                bool atomic, bool may_store_unaligned,
-                                bool writeonly_memory)
-{
-       unsigned cache_policy = 0;
-
-       if (!atomic &&
-           /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.
-            * All store opcodes not aligned to a dword are affected.
-            * The only way to get unaligned stores in radeonsi is through
-            * shader images. */
-           ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) ||
-            /* If this is write-only, don't keep data in L1 to prevent
-             * evicting L1 cache lines that may be needed by other
-             * instructions. */
-            writeonly_memory ||
-            inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))) {
-               cache_policy |= ac_glc;
-       }
-
-       if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
-               cache_policy |= ac_slc;
-
-       return cache_policy;
-}
-
-static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
-                                   const struct tgsi_full_instruction *inst,
-                                   LLVMTypeRef type, int arg)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef offset, ptr;
-       int addr_space;
-
-       offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
-       offset = ac_to_integer(&ctx->ac, offset);
-
-       ptr = ctx->ac.lds;
-       ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
-       addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-       ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
-
-       return ptr;
-}
-
-static void load_emit_memory(
-               struct si_shader_context *ctx,
-               struct lp_build_emit_data *emit_data)
-{
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       unsigned writemask = inst->Dst[0].Register.WriteMask;
-       LLVMValueRef channels[4], ptr, derived_ptr, index;
-       int chan;
-
-       ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
-
-       for (chan = 0; chan < 4; ++chan) {
-               if (!(writemask & (1 << chan))) {
-                       channels[chan] = LLVMGetUndef(ctx->f32);
-                       continue;
-               }
-
-               index = LLVMConstInt(ctx->i32, chan, 0);
-               derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
-               channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
-       }
-       emit_data->output[emit_data->chan] = ac_build_gather_values(&ctx->ac, channels, 4);
-}
-
-/**
- * Return true if the memory accessed by a LOAD or STORE instruction is
- * read-only or write-only, respectively.
- *
- * \param shader_buffers_reverse_access_mask
- *     For LOAD, set this to (store | atomic) slot usage in the shader.
- *     For STORE, set this to (load | atomic) slot usage in the shader.
- * \param images_reverse_access_mask  Same as above, but for images.
- * \param bindless_buffer_reverse_access_mask  Same as above, but for bindless image buffers.
- * \param bindless_image_reverse_access_mask   Same as above, but for bindless images.
- */
-static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
-                                 const struct tgsi_shader_info *info,
-                                 unsigned shader_buffers_reverse_access_mask,
-                                 unsigned images_reverse_access_mask,
-                                 bool bindless_buffer_reverse_access_mask,
-                                 bool bindless_image_reverse_access_mask)
-{
-       enum tgsi_file_type resource_file;
-       unsigned resource_index;
-       bool resource_indirect;
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
-               resource_file = inst->Dst[0].Register.File;
-               resource_index = inst->Dst[0].Register.Index;
-               resource_indirect = inst->Dst[0].Register.Indirect;
-       } else {
-               resource_file = inst->Src[0].Register.File;
-               resource_index = inst->Src[0].Register.Index;
-               resource_indirect = inst->Src[0].Register.Indirect;
-       }
-
-       assert(resource_file == TGSI_FILE_BUFFER ||
-              resource_file == TGSI_FILE_IMAGE ||
-              /* bindless image */
-              resource_file == TGSI_FILE_INPUT ||
-              resource_file == TGSI_FILE_OUTPUT ||
-              resource_file == TGSI_FILE_CONSTANT ||
-              resource_file == TGSI_FILE_TEMPORARY ||
-              resource_file == TGSI_FILE_IMMEDIATE);
-
-       assert(resource_file != TGSI_FILE_BUFFER ||
-              inst->Memory.Texture == TGSI_TEXTURE_BUFFER);
-
-       bool bindless = resource_file != TGSI_FILE_BUFFER &&
-                       resource_file != TGSI_FILE_IMAGE;
-
-       /* RESTRICT means NOALIAS.
-        * If there are no writes, we can assume the accessed memory is read-only.
-        * If there are no reads, we can assume the accessed memory is write-only.
-        */
-       if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) {
-               unsigned reverse_access_mask;
-
-               if (resource_file == TGSI_FILE_BUFFER) {
-                       reverse_access_mask = shader_buffers_reverse_access_mask;
-               } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-                       reverse_access_mask = info->images_buffers &
-                                             images_reverse_access_mask;
-               } else {
-                       reverse_access_mask = ~info->images_buffers &
-                                             images_reverse_access_mask;
-               }
-
-               if (resource_indirect) {
-                       if (!reverse_access_mask)
-                               return true;
-               } else {
-                       if (!(reverse_access_mask &
-                             (1u << resource_index)))
-                               return true;
-               }
-       }
-
-       /* If there are no buffer writes (for both shader buffers & image
-        * buffers), it implies that buffer memory is read-only.
-        * If there are no buffer reads (for both shader buffers & image
-        * buffers), it implies that buffer memory is write-only.
-        *
-        * Same for the case when there are no writes/reads for non-buffer
-        * images.
-        */
-       if (resource_file == TGSI_FILE_BUFFER ||
-           inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-               if (!shader_buffers_reverse_access_mask &&
-                   !(info->images_buffers & images_reverse_access_mask) &&
-                   !bindless_buffer_reverse_access_mask)
-                       return true;
-       } else {
-               if (!(~info->images_buffers & images_reverse_access_mask) &&
-                   !bindless_image_reverse_access_mask)
-                       return true;
-       }
-       return false;
-}
-
-static void load_emit(
-               const struct lp_build_tgsi_action *action,
-               struct lp_build_tgsi_context *bld_base,
-               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       const struct tgsi_full_instruction * inst = emit_data->inst;
-       const struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       bool can_speculate = false;
-       LLVMValueRef vindex = ctx->i32_0;
-       LLVMValueRef voffset = ctx->i32_0;
-       struct ac_image_args args = {};
-
-       if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
-               load_emit_memory(ctx, emit_data);
-               return;
-       }
-
-       if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
-           inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
-               bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
-               args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
-               voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
-       } else {
-               unsigned target = inst->Memory.Texture;
-
-               image_fetch_rsrc(bld_base, &inst->Src[0], false, false, target, &args.resource);
-               image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
-
-               if ((inst->Memory.Texture == TGSI_TEXTURE_2D_MSAA ||
-                    inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
-                   !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-                       LLVMValueRef fmask;
-
-                       image_fetch_rsrc(bld_base, &inst->Src[0], true, false, target, &fmask);
-                       ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
-                                                inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
-               }
-               vindex = args.coords[0]; /* for buffers only */
-       }
-
-       if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
-               emit_data->output[emit_data->chan] =
-                       ac_build_buffer_load(&ctx->ac, args.resource,
-                                            util_last_bit(inst->Dst[0].Register.WriteMask),
-                                            NULL, voffset, NULL, 0, 0, true, true);
-               return;
-       }
-
-       if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-               ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
-
-       can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
-                         is_oneway_access_only(inst, info,
-                                               info->shader_buffers_store |
-                                               info->shader_buffers_atomic,
-                                               info->images_store |
-                                               info->images_atomic,
-                                               info->uses_bindless_buffer_store |
-                                               info->uses_bindless_buffer_atomic,
-                                               info->uses_bindless_image_store |
-                                               info->uses_bindless_image_atomic);
-       args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
-
-       if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-               /* Don't use SMEM for shader buffer loads, because LLVM doesn't
-                * select SMEM for SI.load.const with a non-constant offset, and
-                * constant offsets practically don't exist with shader buffers.
-                *
-                * Also, SI.load.const doesn't use inst_offset when it's lowered
-                * to VMEM, so we just end up with more VALU instructions in the end
-                * and no benefit.
-                *
-                * TODO: Remove this line once LLVM can select SMEM with a non-constant
-                *       offset, and can derive inst_offset when VMEM is selected.
-                *       After that, si_memory_barrier should invalidate sL1 for shader
-                *       buffers.
-                */
-               emit_data->output[emit_data->chan] =
-                       ac_build_buffer_load(&ctx->ac, args.resource,
-                                            util_last_bit(inst->Dst[0].Register.WriteMask),
-                                            NULL, voffset, NULL, 0,
-                                            args.cache_policy, can_speculate, false);
-               return;
-       }
-
-       if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-               unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
-               LLVMValueRef result =
-                       ac_build_buffer_load_format(&ctx->ac,
-                                                   args.resource,
-                                                   vindex,
-                                                   ctx->i32_0,
-                                                   num_channels,
-                                                   args.cache_policy,
-                                                   can_speculate);
-               emit_data->output[emit_data->chan] =
-                       ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
-       } else {
-               args.opcode = ac_image_load;
-               args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
-               args.attributes = ac_get_load_intr_attribs(can_speculate);
-               args.dmask = 0xf;
-
-               emit_data->output[emit_data->chan] =
-                       ac_build_image_opcode(&ctx->ac, &args);
-       }
-}
-
-static void store_emit_buffer(struct si_shader_context *ctx,
-                             LLVMValueRef resource,
-                             unsigned writemask,
-                             LLVMValueRef value,
-                             LLVMValueRef voffset,
-                             unsigned cache_policy,
-                             bool writeonly_memory)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef base_data = value;
-       LLVMValueRef base_offset = voffset;
-
-       while (writemask) {
-               int start, count;
-               LLVMValueRef data, voff;
-
-               u_bit_scan_consecutive_range(&writemask, &start, &count);
-
-               if (count == 3 && ac_has_vec3_support(ctx->ac.chip_class, false)) {
-                       LLVMValueRef values[3] = {
-                               LLVMBuildExtractElement(builder, base_data,
-                                                       LLVMConstInt(ctx->i32, start, 0), ""),
-                               LLVMBuildExtractElement(builder, base_data,
-                                                       LLVMConstInt(ctx->i32, start + 1, 0), ""),
-                               LLVMBuildExtractElement(builder, base_data,
-                                                       LLVMConstInt(ctx->i32, start + 2, 0), ""),
-                       };
-                       data = ac_build_gather_values(&ctx->ac, values, 3);
-               } else if (count >= 3) {
-                       data = base_data;
-               } else if (count == 2) {
-                       LLVMValueRef values[2] = {
-                               LLVMBuildExtractElement(builder, base_data,
-                                                       LLVMConstInt(ctx->i32, start, 0), ""),
-                               LLVMBuildExtractElement(builder, base_data,
-                                                       LLVMConstInt(ctx->i32, start + 1, 0), ""),
-                       };
-
-                       data = ac_build_gather_values(&ctx->ac, values, 2);
-               } else {
-                       assert(count == 1);
-                       data = LLVMBuildExtractElement(
-                               builder, base_data,
-                               LLVMConstInt(ctx->i32, start, 0), "");
-               }
-
-               voff = base_offset;
-               if (start != 0) {
-                       voff = LLVMBuildAdd(
-                               builder, voff,
-                               LLVMConstInt(ctx->i32, start * 4, 0), "");
-               }
-
-               ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
-                                           voff, ctx->i32_0, 0, cache_policy);
-       }
-}
-
-static void store_emit_memory(
-               struct si_shader_context *ctx,
-               struct lp_build_emit_data *emit_data)
-{
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       LLVMBuilderRef builder = ctx->ac.builder;
-       unsigned writemask = inst->Dst[0].Register.WriteMask;
-       LLVMValueRef ptr, derived_ptr, data, index;
-       int chan;
-
-       ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
-
-       for (chan = 0; chan < 4; ++chan) {
-               if (!(writemask & (1 << chan))) {
-                       continue;
-               }
-               data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan);
-               index = LLVMConstInt(ctx->i32, chan, 0);
-               derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
-               LLVMBuildStore(builder, data, derived_ptr);
-       }
-}
-
-static void store_emit(
-               const struct lp_build_tgsi_action *action,
-               struct lp_build_tgsi_context *bld_base,
-               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       const struct tgsi_full_instruction * inst = emit_data->inst;
-       const struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       struct tgsi_full_src_register resource_reg =
-               tgsi_full_src_register_from_dst(&inst->Dst[0]);
-       unsigned target = inst->Memory.Texture;
-
-       if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
-               store_emit_memory(ctx, emit_data);
-               return;
-       }
-
-       bool writeonly_memory = is_oneway_access_only(inst, info,
-                                                     info->shader_buffers_load |
-                                                     info->shader_buffers_atomic,
-                                                     info->images_load |
-                                                     info->images_atomic,
-                                                     info->uses_bindless_buffer_load |
-                                                     info->uses_bindless_buffer_atomic,
-                                                     info->uses_bindless_image_load |
-                                                     info->uses_bindless_image_atomic);
-       LLVMValueRef chans[4];
-       LLVMValueRef vindex = ctx->i32_0;
-       LLVMValueRef voffset = ctx->i32_0;
-       struct ac_image_args args = {};
-
-       for (unsigned chan = 0; chan < 4; ++chan)
-               chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
-
-       if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-               args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
-               voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0));
-       } else {
-               image_fetch_rsrc(bld_base, &resource_reg, false, true, target, &args.resource);
-               image_fetch_coords(bld_base, inst, 0, args.resource, args.coords);
-               vindex = args.coords[0]; /* for buffers only */
-       }
-
-       if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-               ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
-
-       bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
-       args.cache_policy = get_cache_policy(ctx, inst,
-                                            false, /* atomic */
-                                            is_image, /* may_store_unaligned */
-                                            writeonly_memory);
-
-       if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-               store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask,
-                                 ac_build_gather_values(&ctx->ac, chans, 4),
-                                 voffset, args.cache_policy, writeonly_memory);
-               return;
-       }
-
-       if (target == TGSI_TEXTURE_BUFFER) {
-               unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
-
-               ac_build_buffer_store_format(&ctx->ac, args.resource,
-                                            ac_build_gather_values(&ctx->ac, chans, num_channels),
-                                            vindex, ctx->i32_0 /* voffset */,
-                                            num_channels,
-                                            args.cache_policy);
-       } else {
-               args.opcode = ac_image_store;
-               args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
-               args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
-               args.attributes = AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY;
-               args.dmask = 0xf;
-
-               emit_data->output[emit_data->chan] =
-                       ac_build_image_opcode(&ctx->ac, &args);
-       }
-}
-
-static void atomic_emit_memory(struct si_shader_context *ctx,
-                               struct lp_build_emit_data *emit_data) {
-       LLVMBuilderRef builder = ctx->ac.builder;
-       const struct tgsi_full_instruction * inst = emit_data->inst;
-       LLVMValueRef ptr, result, arg;
-       const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
-       ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
-
-       arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
-       arg = ac_to_integer(&ctx->ac, arg);
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
-               LLVMValueRef new_data;
-               new_data = lp_build_emit_fetch(&ctx->bld_base,
-                                              inst, 3, 0);
-
-               new_data = ac_to_integer(&ctx->ac, new_data);
-
-               result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, arg, new_data,
-                                                 sync_scope);
-               result = LLVMBuildExtractValue(builder, result, 0, "");
-       } else {
-               LLVMAtomicRMWBinOp op;
-
-               switch(inst->Instruction.Opcode) {
-                       case TGSI_OPCODE_ATOMUADD:
-                               op = LLVMAtomicRMWBinOpAdd;
-                               break;
-                       case TGSI_OPCODE_ATOMXCHG:
-                               op = LLVMAtomicRMWBinOpXchg;
-                               break;
-                       case TGSI_OPCODE_ATOMAND:
-                               op = LLVMAtomicRMWBinOpAnd;
-                               break;
-                       case TGSI_OPCODE_ATOMOR:
-                               op = LLVMAtomicRMWBinOpOr;
-                               break;
-                       case TGSI_OPCODE_ATOMXOR:
-                               op = LLVMAtomicRMWBinOpXor;
-                               break;
-                       case TGSI_OPCODE_ATOMUMIN:
-                               op = LLVMAtomicRMWBinOpUMin;
-                               break;
-                       case TGSI_OPCODE_ATOMUMAX:
-                               op = LLVMAtomicRMWBinOpUMax;
-                               break;
-                       case TGSI_OPCODE_ATOMIMIN:
-                               op = LLVMAtomicRMWBinOpMin;
-                               break;
-                       case TGSI_OPCODE_ATOMIMAX:
-                               op = LLVMAtomicRMWBinOpMax;
-                               break;
-                       default:
-                               unreachable("unknown atomic opcode");
-               }
-
-               result = ac_build_atomic_rmw(&ctx->ac, op, ptr, arg, sync_scope);
-       }
-       emit_data->output[emit_data->chan] =
-               LLVMBuildBitCast(builder, result, ctx->f32, "");
-}
-
-static void atomic_emit(
-               const struct lp_build_tgsi_action *action,
-               struct lp_build_tgsi_context *bld_base,
-               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       const struct tgsi_full_instruction * inst = emit_data->inst;
-       struct ac_image_args args = {};
-       unsigned num_data = 0;
-       LLVMValueRef vindex = ctx->i32_0;
-       LLVMValueRef voffset = ctx->i32_0;
-
-       if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
-               atomic_emit_memory(ctx, emit_data);
-               return;
-       }
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
-               /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
-                * of arguments, which is reversed relative to TGSI (and GLSL)
-                */
-               args.data[num_data++] =
-                       ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 3, 0));
-       }
-
-       args.data[num_data++] =
-               ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0));
-
-       args.cache_policy = get_cache_policy(ctx, inst, true, false, false);
-
-       if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-               args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
-               voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
-       } else {
-               image_fetch_rsrc(bld_base, &inst->Src[0], false, true,
-                               inst->Memory.Texture, &args.resource);
-               image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
-               vindex = args.coords[0]; /* for buffers only */
-       }
-
-       if (inst->Src[0].Register.File != TGSI_FILE_BUFFER &&
-           inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-               LLVMValueRef buf_args[7];
-               unsigned num_args = 0;
-
-               buf_args[num_args++] = args.data[0];
-               if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
-                       buf_args[num_args++] = args.data[1];
-
-               buf_args[num_args++] = args.resource;
-               buf_args[num_args++] = vindex;
-               buf_args[num_args++] = voffset;
-               buf_args[num_args++] = ctx->i32_0; /* soffset */
-               buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0);
-
-               char intrinsic_name[64];
-               snprintf(intrinsic_name, sizeof(intrinsic_name),
-                        "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name);
-               emit_data->output[emit_data->chan] =
-                       ac_to_float(&ctx->ac,
-                                   ac_build_intrinsic(&ctx->ac, intrinsic_name,
-                                                      ctx->i32, buf_args, num_args, 0));
-               return;
-       }
-
-       if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-               LLVMValueRef buf_args[7];
-               unsigned num_args = 0;
-
-               buf_args[num_args++] = args.data[0];
-               if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
-                       buf_args[num_args++] = args.data[1];
-
-               buf_args[num_args++] = args.resource;
-               buf_args[num_args++] = vindex;
-               buf_args[num_args++] = voffset;
-               buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
-
-               char intrinsic_name[40];
-               snprintf(intrinsic_name, sizeof(intrinsic_name),
-                        "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
-               emit_data->output[emit_data->chan] =
-                       ac_to_float(&ctx->ac,
-                                   ac_build_intrinsic(&ctx->ac, intrinsic_name,
-                                                      ctx->i32, buf_args, num_args, 0));
-       } else {
-               if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
-                       args.opcode = ac_image_atomic_cmpswap;
-               } else {
-                       args.opcode = ac_image_atomic;
-                       switch (inst->Instruction.Opcode) {
-                       case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break;
-                       case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break;
-                       case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break;
-                       case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break;
-                       case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break;
-                       case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break;
-                       case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break;
-                       case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break;
-                       case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break;
-                       case TGSI_OPCODE_ATOMINC_WRAP:
-                               args.atomic = ac_atomic_inc_wrap;
-                               break;
-                       case TGSI_OPCODE_ATOMDEC_WRAP:
-                               args.atomic = ac_atomic_dec_wrap;
-                               break;
-                       default: unreachable("unhandled image atomic");
-                       }
-               }
-
-               args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
-               emit_data->output[emit_data->chan] =
-                       ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args));
-       }
-}
-
-static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
-                               unsigned target, LLVMValueRef out)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-
-       /* 1D textures are allocated and used as 2D on GFX9. */
-        if (ctx->screen->info.chip_class == GFX9 &&
-           (target == TGSI_TEXTURE_1D_ARRAY ||
-            target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
-               LLVMValueRef layers =
-                       LLVMBuildExtractElement(builder, out,
-                                               LLVMConstInt(ctx->i32, 2, 0), "");
-               out = LLVMBuildInsertElement(builder, out, layers,
-                                            ctx->i32_1, "");
-       }
-
-       /* Divide the number of layers by 6 to get the number of cubes. */
-       if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-           target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-               LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
-
-               LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
-               z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
-
-               out = LLVMBuildInsertElement(builder, out, z, imm2, "");
-       }
-       return out;
-}
-
-static void resq_emit(
-               const struct lp_build_tgsi_action *action,
-               struct lp_build_tgsi_context *bld_base,
-               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       const struct tgsi_full_src_register *reg =
-               &inst->Src[inst->Instruction.Opcode == TGSI_OPCODE_TXQ ? 1 : 0];
-
-       if (reg->Register.File == TGSI_FILE_BUFFER) {
-               LLVMValueRef rsrc = shader_buffer_fetch_rsrc(ctx, reg, false);
-
-               emit_data->output[emit_data->chan] =
-                       LLVMBuildExtractElement(builder, rsrc,
-                                               LLVMConstInt(ctx->i32, 2, 0), "");
-               return;
-       }
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
-           inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
-               LLVMValueRef rsrc;
-
-               tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
-               /* Read the size from the buffer descriptor directly. */
-               emit_data->output[emit_data->chan] =
-                       get_buffer_size(bld_base, rsrc);
-               return;
-       }
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
-           inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
-               LLVMValueRef rsrc;
-
-               image_fetch_rsrc(bld_base, reg, false, false, inst->Memory.Texture, &rsrc);
-               emit_data->output[emit_data->chan] =
-                       get_buffer_size(bld_base, rsrc);
-               return;
-       }
-
-       unsigned target;
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
-               target = inst->Texture.Texture;
-       } else {
-               if (inst->Memory.Texture == TGSI_TEXTURE_3D)
-                       target = TGSI_TEXTURE_2D_ARRAY;
-               else
-                       target = inst->Memory.Texture;
-       }
-
-       struct ac_image_args args = {};
-       args.opcode = ac_image_get_resinfo;
-       args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
-       args.dmask = 0xf;
-       args.attributes = AC_FUNC_ATTR_READNONE;
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
-               tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL);
-               args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-       } else {
-               image_fetch_rsrc(bld_base, reg, false, false, target, &args.resource);
-               args.lod = ctx->i32_0;
-       }
-
-       emit_data->output[emit_data->chan] =
-               fix_resinfo(ctx, target, ac_build_image_opcode(&ctx->ac, &args));
-
-       if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
-           (target == TGSI_TEXTURE_2D_MSAA ||
-            target == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
-               LLVMValueRef samples =
-                       ac_build_image_get_sample_count(&ctx->ac, args.resource);
-
-               emit_data->output[emit_data->chan] =
-                       LLVMBuildInsertElement(ctx->ac.builder,
-                                              emit_data->output[emit_data->chan],
-                                              samples,
-                                              LLVMConstInt(ctx->i32, 3, 0), "");
-       }
-}
-
-/**
- * Load an image view, fmask view. or sampler state descriptor.
- */
-LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
-                                 LLVMValueRef list, LLVMValueRef index,
-                                 enum ac_descriptor_type type)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-
-       switch (type) {
-       case AC_DESC_IMAGE:
-               /* The image is at [0:7]. */
-               index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
-               break;
-       case AC_DESC_BUFFER:
-               /* The buffer is in [4:7]. */
-               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
-                                     ctx->i32_1);
-               list = LLVMBuildPointerCast(builder, list,
-                                           ac_array_in_const32_addr_space(ctx->v4i32), "");
-               break;
-       case AC_DESC_FMASK:
-               /* The FMASK is at [8:15]. */
-               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
-                                     ctx->i32_1);
-               break;
-       case AC_DESC_SAMPLER:
-               /* The sampler state is at [12:15]. */
-               index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
-                                     LLVMConstInt(ctx->i32, 3, 0));
-               list = LLVMBuildPointerCast(builder, list,
-                                           ac_array_in_const32_addr_space(ctx->v4i32), "");
-               break;
-       case AC_DESC_PLANE_0:
-       case AC_DESC_PLANE_1:
-       case AC_DESC_PLANE_2:
-               /* Only used for the multiplane image support for Vulkan. Should
-                * never be reached in radeonsi.
-                */
-               unreachable("Plane descriptor requested in radeonsi.");
-       }
-
-       return ac_build_load_to_sgpr(&ctx->ac, list, index);
-}
-
-/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
- *
- * GFX6-GFX7:
- *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
- *   filtering manually. The driver sets img7 to a mask clearing
- *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
- *     s_and_b32 samp0, samp0, img7
- *
- * GFX8:
- *   The ANISO_OVERRIDE sampler field enables this fix in TA.
- */
-static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
-                                          LLVMValueRef res, LLVMValueRef samp)
-{
-       LLVMValueRef img7, samp0;
-
-       if (ctx->screen->info.chip_class >= GFX8)
-               return samp;
-
-       img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
-                                      LLVMConstInt(ctx->i32, 7, 0), "");
-       samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
-                                       ctx->i32_0, "");
-       samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
-       return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
-                                     ctx->i32_0, "");
-}
-
-static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
-                          struct lp_build_emit_data *emit_data,
-                          LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
-                          LLVMValueRef *fmask_ptr)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       const struct tgsi_full_src_register *reg;
-       unsigned target = inst->Texture.Texture;
-       unsigned sampler_src;
-       LLVMValueRef index;
-
-       sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
-       reg = &emit_data->inst->Src[sampler_src];
-
-       if (reg->Register.Indirect) {
-               index = si_get_bounded_indirect_index(ctx,
-                                                     &reg->Indirect,
-                                                     reg->Register.Index,
-                                                     ctx->num_samplers);
-               index = LLVMBuildAdd(ctx->ac.builder, index,
-                                    LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
-       } else {
-               index = LLVMConstInt(ctx->i32,
-                                    si_get_sampler_slot(reg->Register.Index), 0);
-       }
-
-       if (reg->Register.File != TGSI_FILE_SAMPLER) {
-               /* Bindless descriptors are accessible from a different pair of
-                * user SGPR indices.
-                */
-               list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
-               index = lp_build_emit_fetch_src(bld_base, reg,
-                                               TGSI_TYPE_UNSIGNED, 0);
-
-               /* Since bindless handle arithmetic can contain an unsigned integer
-                * wraparound and si_load_sampler_desc assumes there isn't any,
-                * use GEP without "inbounds" (inside ac_build_pointer_add)
-                * to prevent incorrect code generation and hangs.
-                */
-               index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
-               list = ac_build_pointer_add(&ctx->ac, list, index);
-               index = ctx->i32_0;
-       }
-
-       if (target == TGSI_TEXTURE_BUFFER)
-               *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
-       else
-               *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
-
-       if (samp_ptr)
-               *samp_ptr = NULL;
-       if (fmask_ptr)
-               *fmask_ptr = NULL;
-
-       if (target == TGSI_TEXTURE_2D_MSAA ||
-           target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-               if (fmask_ptr)
-                       *fmask_ptr = si_load_sampler_desc(ctx, list, index,
-                                                         AC_DESC_FMASK);
-       } else if (target != TGSI_TEXTURE_BUFFER) {
-               if (samp_ptr) {
-                       *samp_ptr = si_load_sampler_desc(ctx, list, index,
-                                                        AC_DESC_SAMPLER);
-                       *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
-               }
-       }
-}
-
-/* Gather4 should follow the same rules as bilinear filtering, but the hardware
- * incorrectly forces nearest filtering if the texture format is integer.
- * The only effect it has on Gather4, which always returns 4 texels for
- * bilinear filtering, is that the final coordinates are off by 0.5 of
- * the texel size.
- *
- * The workaround is to subtract 0.5 from the unnormalized coordinates,
- * or (0.5 / size) from the normalized coordinates.
- *
- * However, cube textures with 8_8_8_8 data formats require a different
- * workaround of overriding the num format to USCALED/SSCALED. This would lose
- * precision in 32-bit data formats, so it needs to be applied dynamically at
- * runtime. In this case, return an i1 value that indicates whether the
- * descriptor was overridden (and hence a fixup of the sampler result is needed).
- */
-static LLVMValueRef
-si_lower_gather4_integer(struct si_shader_context *ctx,
-                        struct ac_image_args *args,
-                        unsigned target,
-                        enum tgsi_return_type return_type)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef wa_8888 = NULL;
-       LLVMValueRef half_texel[2];
-
-       assert(return_type == TGSI_RETURN_TYPE_SINT ||
-              return_type == TGSI_RETURN_TYPE_UINT);
-
-       if (target == TGSI_TEXTURE_CUBE ||
-           target == TGSI_TEXTURE_CUBE_ARRAY) {
-               LLVMValueRef formats;
-               LLVMValueRef data_format;
-               LLVMValueRef wa_formats;
-
-               formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
-
-               data_format = LLVMBuildLShr(builder, formats,
-                                           LLVMConstInt(ctx->i32, 20, false), "");
-               data_format = LLVMBuildAnd(builder, data_format,
-                                          LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
-               wa_8888 = LLVMBuildICmp(
-                       builder, LLVMIntEQ, data_format,
-                       LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
-                       "");
-
-               uint32_t wa_num_format =
-                       return_type == TGSI_RETURN_TYPE_UINT ?
-                       S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
-                       S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
-               wa_formats = LLVMBuildAnd(builder, formats,
-                                         LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
-                                         "");
-               wa_formats = LLVMBuildOr(builder, wa_formats,
-                                       LLVMConstInt(ctx->i32, wa_num_format, false), "");
-
-               formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
-               args->resource = LLVMBuildInsertElement(
-                       builder, args->resource, formats, ctx->i32_1, "");
-       }
-
-       if (target == TGSI_TEXTURE_RECT ||
-           target == TGSI_TEXTURE_SHADOWRECT) {
-               assert(!wa_8888);
-               half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
-       } else {
-               struct ac_image_args resinfo = {};
-               struct lp_build_if_state if_ctx;
-
-               if (wa_8888) {
-                       /* Skip the texture size query entirely if we don't need it. */
-                       lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
-               }
-
-               /* Query the texture size. */
-               resinfo.opcode = ac_image_get_resinfo;
-               resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
-               resinfo.resource = args->resource;
-               resinfo.sampler = args->sampler;
-               resinfo.lod = ctx->ac.i32_0;
-               resinfo.dmask = 0xf;
-               resinfo.attributes = AC_FUNC_ATTR_READNONE;
-
-               LLVMValueRef texsize =
-                       fix_resinfo(ctx, target,
-                                   ac_build_image_opcode(&ctx->ac, &resinfo));
-
-               /* Compute -0.5 / size. */
-               for (unsigned c = 0; c < 2; c++) {
-                       half_texel[c] =
-                               LLVMBuildExtractElement(builder, texsize,
-                                                       LLVMConstInt(ctx->i32, c, 0), "");
-                       half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
-                       half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, half_texel[c]);
-                       half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
-                                                     LLVMConstReal(ctx->f32, -0.5), "");
-               }
-
-               if (wa_8888) {
-                       lp_build_endif(&if_ctx);
-
-                       LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
-
-                       for (unsigned c = 0; c < 2; c++) {
-                               LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
-                               half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
-                                                            values, bb);
-                       }
-               }
-       }
-
-       for (unsigned c = 0; c < 2; c++) {
-               LLVMValueRef tmp;
-               tmp = ac_to_float(&ctx->ac, args->coords[c]);
-               tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
-               args->coords[c] = ac_to_integer(&ctx->ac, tmp);
-       }
-
-       return wa_8888;
-}
-
-/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
- * result after the gather operation.
- */
-static LLVMValueRef
-si_fix_gather4_integer_result(struct si_shader_context *ctx,
-                          LLVMValueRef result,
-                          enum tgsi_return_type return_type,
-                          LLVMValueRef wa)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-
-       assert(return_type == TGSI_RETURN_TYPE_SINT ||
-              return_type == TGSI_RETURN_TYPE_UINT);
-
-       for (unsigned chan = 0; chan < 4; ++chan) {
-               LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
-               LLVMValueRef value;
-               LLVMValueRef wa_value;
-
-               value = LLVMBuildExtractElement(builder, result, chanv, "");
-
-               if (return_type == TGSI_RETURN_TYPE_UINT)
-                       wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
-               else
-                       wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
-               wa_value = ac_to_float(&ctx->ac, wa_value);
-               value = LLVMBuildSelect(builder, wa, wa_value, value, "");
-
-               result = LLVMBuildInsertElement(builder, result, value, chanv, "");
-       }
-
-       return result;
-}
-
-static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
-                               struct lp_build_tgsi_context *bld_base,
-                               struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       unsigned opcode = inst->Instruction.Opcode;
-       unsigned target = inst->Texture.Texture;
-       struct ac_image_args args = {};
-       int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
-       unsigned chan;
-       bool has_offset = inst->Texture.NumOffsets > 0;
-       LLVMValueRef fmask_ptr = NULL;
-
-       tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr);
-
-       if (target == TGSI_TEXTURE_BUFFER) {
-               LLVMValueRef vindex = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-               unsigned num_channels =
-                       util_last_bit(inst->Dst[0].Register.WriteMask);
-               LLVMValueRef result =
-                       ac_build_buffer_load_format(&ctx->ac,
-                                                   args.resource,
-                                                   vindex,
-                                                   ctx->i32_0,
-                                                   num_channels, 0, true);
-               emit_data->output[emit_data->chan] =
-                       ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
-               return;
-       }
-
-       /* Fetch and project texture coordinates */
-       args.coords[3] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_W);
-       for (chan = 0; chan < 3; chan++) {
-               args.coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
-               if (opcode == TGSI_OPCODE_TXP)
-                       args.coords[chan] = ac_build_fdiv(&ctx->ac,
-                               args.coords[chan], args.coords[3]);
-       }
-
-       if (opcode == TGSI_OPCODE_TXP)
-               args.coords[3] = ctx->ac.f32_1;
-
-       /* Pack offsets. */
-       if (has_offset &&
-           opcode != TGSI_OPCODE_TXF &&
-           opcode != TGSI_OPCODE_TXF_LZ) {
-               /* The offsets are six-bit signed integers packed like this:
-                *   X=[5:0], Y=[13:8], and Z=[21:16].
-                */
-               LLVMValueRef offset[3], pack;
-
-               assert(inst->Texture.NumOffsets == 1);
-
-               for (chan = 0; chan < 3; chan++) {
-                       offset[chan] = lp_build_emit_fetch_texoffset(bld_base, inst, 0, chan);
-                       offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
-                                                   LLVMConstInt(ctx->i32, 0x3f, 0), "");
-                       if (chan)
-                               offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
-                                                           LLVMConstInt(ctx->i32, chan*8, 0), "");
-               }
-
-               pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
-               pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
-               args.offset = pack;
-       }
-
-       /* Pack LOD bias value */
-       if (opcode == TGSI_OPCODE_TXB)
-               args.bias = args.coords[3];
-       if (opcode == TGSI_OPCODE_TXB2)
-               args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-
-       /* Pack depth comparison value */
-       if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
-               LLVMValueRef z;
-
-               if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-                       z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-               } else {
-                       assert(ref_pos >= 0);
-                       z = args.coords[ref_pos];
-               }
-
-               /* Section 8.23.1 (Depth Texture Comparison Mode) of the
-                * OpenGL 4.5 spec says:
-                *
-                *    "If the texture’s internal format indicates a fixed-point
-                *     depth texture, then D_t and D_ref are clamped to the
-                *     range [0, 1]; otherwise no clamping is performed."
-                *
-                * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
-                * so the depth comparison value isn't clamped for Z16 and
-                * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
-                * an explicitly clamped 32-bit float format.
-                */
-               if (ctx->screen->info.chip_class >= GFX8 &&
-                   ctx->screen->info.chip_class <= GFX9) {
-                       LLVMValueRef upgraded;
-                       LLVMValueRef clamped;
-                       upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
-                                                          LLVMConstInt(ctx->i32, 3, false), "");
-                       upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
-                                                LLVMConstInt(ctx->i32, 29, false), "");
-                       upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
-                       clamped = ac_build_clamp(&ctx->ac, z);
-                       z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
-               }
-
-               args.compare = z;
-       }
-
-       /* Pack user derivatives */
-       if (opcode == TGSI_OPCODE_TXD) {
-               int param, num_src_deriv_channels, num_dst_deriv_channels;
-
-               switch (target) {
-               case TGSI_TEXTURE_3D:
-                       num_src_deriv_channels = 3;
-                       num_dst_deriv_channels = 3;
-                       break;
-               case TGSI_TEXTURE_2D:
-               case TGSI_TEXTURE_SHADOW2D:
-               case TGSI_TEXTURE_RECT:
-               case TGSI_TEXTURE_SHADOWRECT:
-               case TGSI_TEXTURE_2D_ARRAY:
-               case TGSI_TEXTURE_SHADOW2D_ARRAY:
-                       num_src_deriv_channels = 2;
-                       num_dst_deriv_channels = 2;
-                       break;
-               case TGSI_TEXTURE_CUBE:
-               case TGSI_TEXTURE_SHADOWCUBE:
-               case TGSI_TEXTURE_CUBE_ARRAY:
-               case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
-                       /* Cube derivatives will be converted to 2D. */
-                       num_src_deriv_channels = 3;
-                       num_dst_deriv_channels = 3;
-                       break;
-               case TGSI_TEXTURE_1D:
-               case TGSI_TEXTURE_SHADOW1D:
-               case TGSI_TEXTURE_1D_ARRAY:
-               case TGSI_TEXTURE_SHADOW1D_ARRAY:
-                       num_src_deriv_channels = 1;
-
-                       /* 1D textures are allocated and used as 2D on GFX9. */
-                       if (ctx->screen->info.chip_class == GFX9) {
-                               num_dst_deriv_channels = 2;
-                       } else {
-                               num_dst_deriv_channels = 1;
-                       }
-                       break;
-               default:
-                       unreachable("invalid target");
-               }
-
-               for (param = 0; param < 2; param++) {
-                       for (chan = 0; chan < num_src_deriv_channels; chan++)
-                               args.derivs[param * num_dst_deriv_channels + chan] =
-                                       lp_build_emit_fetch(bld_base, inst, param+1, chan);
-
-                       /* Fill in the rest with zeros. */
-                       for (chan = num_src_deriv_channels;
-                            chan < num_dst_deriv_channels; chan++)
-                               args.derivs[param * num_dst_deriv_channels + chan] =
-                                       ctx->ac.f32_0;
-               }
-       }
-
-       if (target == TGSI_TEXTURE_CUBE ||
-           target == TGSI_TEXTURE_CUBE_ARRAY ||
-           target == TGSI_TEXTURE_SHADOWCUBE ||
-           target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-               ac_prepare_cube_coords(&ctx->ac,
-                                      opcode == TGSI_OPCODE_TXD,
-                                      target == TGSI_TEXTURE_CUBE_ARRAY ||
-                                      target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
-                                      opcode == TGSI_OPCODE_LODQ,
-                                      args.coords, args.derivs);
-       } else if (tgsi_is_array_sampler(target) &&
-                  opcode != TGSI_OPCODE_TXF &&
-                  opcode != TGSI_OPCODE_TXF_LZ &&
-                  ctx->screen->info.chip_class <= GFX8) {
-               unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
-               args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]);
-       }
-
-       /* 1D textures are allocated and used as 2D on GFX9. */
-       if (ctx->screen->info.chip_class == GFX9) {
-               LLVMValueRef filler;
-
-               /* Use 0.5, so that we don't sample the border color. */
-               if (opcode == TGSI_OPCODE_TXF ||
-                   opcode == TGSI_OPCODE_TXF_LZ)
-                       filler = ctx->i32_0;
-               else
-                       filler = LLVMConstReal(ctx->f32, 0.5);
-
-               if (target == TGSI_TEXTURE_1D ||
-                   target == TGSI_TEXTURE_SHADOW1D) {
-                       args.coords[1] = filler;
-               } else if (target == TGSI_TEXTURE_1D_ARRAY ||
-                          target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
-                       args.coords[2] = args.coords[1];
-                       args.coords[1] = filler;
-               }
-       }
-
-       /* Pack LOD or sample index */
-       if (opcode == TGSI_OPCODE_TXL)
-               args.lod = args.coords[3];
-       else if (opcode == TGSI_OPCODE_TXL2)
-               args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-       else if (opcode == TGSI_OPCODE_TXF) {
-               if (target == TGSI_TEXTURE_2D_MSAA) {
-                       /* No LOD, but move sample index into the right place. */
-                       args.coords[2] = args.coords[3];
-               } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
-                       args.lod = args.coords[3];
-               }
-       }
-
-       if ((target == TGSI_TEXTURE_2D_MSAA ||
-            target == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
-           !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-               ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords,
-                                        target == TGSI_TEXTURE_2D_ARRAY_MSAA);
-       }
-
-       if (opcode == TGSI_OPCODE_TXF ||
-           opcode == TGSI_OPCODE_TXF_LZ) {
-               /* add tex offsets */
-               if (inst->Texture.NumOffsets) {
-                       const struct tgsi_texture_offset *off = inst->TexOffsets;
-
-                       assert(inst->Texture.NumOffsets == 1);
-
-                       switch (target) {
-                       case TGSI_TEXTURE_3D:
-                               args.coords[2] =
-                                       LLVMBuildAdd(ctx->ac.builder, args.coords[2],
-                                               ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ], "");
-                               /* fall through */
-                       case TGSI_TEXTURE_2D:
-                       case TGSI_TEXTURE_SHADOW2D:
-                       case TGSI_TEXTURE_RECT:
-                       case TGSI_TEXTURE_SHADOWRECT:
-                       case TGSI_TEXTURE_2D_ARRAY:
-                       case TGSI_TEXTURE_SHADOW2D_ARRAY:
-                               args.coords[1] =
-                                       LLVMBuildAdd(ctx->ac.builder, args.coords[1],
-                                               ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY], "");
-                               /* fall through */
-                       case TGSI_TEXTURE_1D:
-                       case TGSI_TEXTURE_SHADOW1D:
-                       case TGSI_TEXTURE_1D_ARRAY:
-                       case TGSI_TEXTURE_SHADOW1D_ARRAY:
-                               args.coords[0] =
-                                       LLVMBuildAdd(ctx->ac.builder, args.coords[0],
-                                               ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX], "");
-                               break;
-                               /* texture offsets do not apply to other texture targets */
-                       }
-               }
-       }
-
-       if (opcode == TGSI_OPCODE_TG4) {
-               unsigned gather_comp = 0;
-
-               /* DMASK was repurposed for GATHER4. 4 components are always
-                * returned and DMASK works like a swizzle - it selects
-                * the component to fetch. The only valid DMASK values are
-                * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
-                * (red,red,red,red) etc.) The ISA document doesn't mention
-                * this.
-                */
-
-               /* Get the component index from src1.x for Gather4. */
-               if (!tgsi_is_shadow_target(target)) {
-                       LLVMValueRef comp_imm;
-                       struct tgsi_src_register src1 = inst->Src[1].Register;
-
-                       assert(src1.File == TGSI_FILE_IMMEDIATE);
-
-                       comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
-                       gather_comp = LLVMConstIntGetZExtValue(comp_imm);
-                       gather_comp = CLAMP(gather_comp, 0, 3);
-               }
-
-               args.dmask = 1 << gather_comp;
-       } else {
-               args.dmask = 0xf;
-       }
-
-       args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
-       args.unorm = target == TGSI_TEXTURE_RECT ||
-                    target == TGSI_TEXTURE_SHADOWRECT;
-       args.opcode = ac_image_sample;
-
-       switch (opcode) {
-       case TGSI_OPCODE_TXF:
-       case TGSI_OPCODE_TXF_LZ:
-               args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
-                             target == TGSI_TEXTURE_2D_MSAA ||
-                             target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
-                                     ac_image_load : ac_image_load_mip;
-               break;
-       case TGSI_OPCODE_LODQ:
-               args.opcode = ac_image_get_lod;
-               break;
-       case TGSI_OPCODE_TEX:
-       case TGSI_OPCODE_TEX2:
-       case TGSI_OPCODE_TXP:
-               if (ctx->type != PIPE_SHADER_FRAGMENT)
-                       args.level_zero = true;
-               break;
-       case TGSI_OPCODE_TEX_LZ:
-               args.level_zero = true;
-               break;
-       case TGSI_OPCODE_TXB:
-       case TGSI_OPCODE_TXB2:
-               assert(ctx->type == PIPE_SHADER_FRAGMENT);
-               break;
-       case TGSI_OPCODE_TXL:
-       case TGSI_OPCODE_TXL2:
-               break;
-       case TGSI_OPCODE_TXD:
-               break;
-       case TGSI_OPCODE_TG4:
-               args.opcode = ac_image_gather4;
-               args.level_zero = true;
-               break;
-       default:
-               assert(0);
-               return;
-       }
-
-       /* The hardware needs special lowering for Gather4 with integer formats. */
-       LLVMValueRef gather4_int_result_workaround = NULL;
-
-       if (ctx->screen->info.chip_class <= GFX8 &&
-           opcode == TGSI_OPCODE_TG4) {
-               assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
-
-               if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
-                   inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
-                       gather4_int_result_workaround =
-                               si_lower_gather4_integer(ctx, &args, target,
-                                                        inst->Texture.ReturnType);
-               }
-       }
-
-       args.attributes = AC_FUNC_ATTR_READNONE;
-       LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
-
-       if (gather4_int_result_workaround) {
-               result = si_fix_gather4_integer_result(ctx, result,
-                                                      inst->Texture.ReturnType,
-                                                      gather4_int_result_workaround);
-       }
-
-       emit_data->output[emit_data->chan] = result;
-}
-
-static void si_llvm_emit_txqs(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef rsrc;
-
-       tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
-
-       rsrc = LLVMBuildBitCast(ctx->ac.builder, rsrc, ctx->v8i32, "");
-       emit_data->output[emit_data->chan] =
-               ac_build_image_get_sample_count(&ctx->ac, rsrc);
-}
-
-static LLVMValueRef si_llvm_emit_fbfetch(struct si_shader_context *ctx)
-{
-       struct ac_image_args args = {};
-       LLVMValueRef ptr, image, fmask;
-
-       /* Ignore src0, because KHR_blend_func_extended disallows multiple render
-        * targets.
-        */
-
-       /* Load the image descriptor. */
-       STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
-       ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
-       ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
-                                  ac_array_in_const32_addr_space(ctx->v8i32), "");
-       image = ac_build_load_to_sgpr(&ctx->ac, ptr,
-                       LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
-
-       unsigned chan = 0;
-
-       args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
-
-       if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
-               args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
-
-       /* Get the current render target layer index. */
-       if (ctx->shader->key.mono.u.ps.fbfetch_layered)
-               args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
-
-       if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
-               args.coords[chan++] = si_get_sample_id(ctx);
-
-       if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
-           !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-               fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
-                       LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
-
-               ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
-                                        ctx->shader->key.mono.u.ps.fbfetch_layered);
-       }
-
-       args.opcode = ac_image_load;
-       args.resource = image;
-       args.dmask = 0xf;
-       args.attributes = AC_FUNC_ATTR_READNONE;
-
-       if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
-               args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
-                       ac_image_2darraymsaa : ac_image_2dmsaa;
-       else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
-               args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
-                       ac_image_1darray : ac_image_1d;
-       else
-               args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
-                       ac_image_2darray : ac_image_2d;
-
-       return ac_build_image_opcode(&ctx->ac, &args);
-}
-
-static void si_tgsi_emit_fbfetch(const struct lp_build_tgsi_action *action,
-                                struct lp_build_tgsi_context *bld_base,
-                                struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       emit_data->output[emit_data->chan] = si_llvm_emit_fbfetch(ctx);
-}
-
-LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
-{
-       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
-       return si_llvm_emit_fbfetch(ctx);
-}
-
-/**
- * Setup actions for TGSI memory opcode, including texture opcodes.
- */
-void si_shader_context_init_mem(struct si_shader_context *ctx)
-{
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
-       bld_base->op_actions[TGSI_OPCODE_TEX].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TEX_LZ].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TEX2].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXB].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXB2].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXD].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXF].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXF_LZ].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXL].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit;
-       bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
-
-       bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_tgsi_emit_fbfetch;
-
-       bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
-       bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
-       bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
-
-       bld_base->op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
-       bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
-       bld_base->op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
-       bld_base->op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
-       bld_base->op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
-       bld_base->op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
-       bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
-       bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
-       bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
-       bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
-       bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].intr_name = "inc";
-       bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].emit = atomic_emit;
-       bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].intr_name = "dec";
-}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
deleted file mode 100644 (file)
index 1443432..0000000
+++ /dev/null
@@ -1,1165 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "ac_llvm_util.h"
-#include "util/u_memory.h"
-
-struct si_llvm_diagnostics {
-       struct pipe_debug_callback *debug;
-       unsigned retval;
-};
-
-static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
-{
-       struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
-       LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
-       const char *severity_str = NULL;
-
-       switch (severity) {
-       case LLVMDSError:
-               severity_str = "error";
-               break;
-       case LLVMDSWarning:
-               severity_str = "warning";
-               break;
-       case LLVMDSRemark:
-       case LLVMDSNote:
-       default:
-               return;
-       }
-
-       char *description = LLVMGetDiagInfoDescription(di);
-
-       pipe_debug_message(diag->debug, SHADER_INFO,
-                          "LLVM diagnostic (%s): %s", severity_str, description);
-
-       if (severity == LLVMDSError) {
-               diag->retval = 1;
-               fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
-       }
-
-       LLVMDisposeMessage(description);
-}
-
-/**
- * Compile an LLVM module to machine code.
- *
- * @returns 0 for success, 1 for failure
- */
-unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
-                        struct ac_llvm_compiler *compiler,
-                        struct pipe_debug_callback *debug,
-                        bool less_optimized, unsigned wave_size)
-{
-       struct ac_compiler_passes *passes = compiler->passes;
-
-       if (wave_size == 32)
-               passes = compiler->passes_wave32;
-       else if (less_optimized && compiler->low_opt_passes)
-               passes = compiler->low_opt_passes;
-
-       struct si_llvm_diagnostics diag;
-       LLVMContextRef llvm_ctx;
-
-       diag.debug = debug;
-       diag.retval = 0;
-
-       /* Setup Diagnostic Handler*/
-       llvm_ctx = LLVMGetModuleContext(M);
-
-       LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
-
-       /* Compile IR. */
-       if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
-                                     &binary->elf_size))
-               diag.retval = 1;
-
-       if (diag.retval != 0)
-               pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
-       return diag.retval;
-}
-
-void si_shader_binary_clean(struct si_shader_binary *binary)
-{
-       free((void *)binary->elf_buffer);
-       binary->elf_buffer = NULL;
-
-       free(binary->llvm_ir_string);
-       binary->llvm_ir_string = NULL;
-}
-
-LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
-                         enum tgsi_opcode_type type)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       switch (type) {
-       case TGSI_TYPE_UNSIGNED:
-       case TGSI_TYPE_SIGNED:
-               return ctx->ac.i32;
-       case TGSI_TYPE_UNSIGNED64:
-       case TGSI_TYPE_SIGNED64:
-               return ctx->ac.i64;
-       case TGSI_TYPE_DOUBLE:
-               return ctx->ac.f64;
-       case TGSI_TYPE_UNTYPED:
-       case TGSI_TYPE_FLOAT:
-               return ctx->ac.f32;
-       default: break;
-       }
-       return 0;
-}
-
-LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
-                    enum tgsi_opcode_type type, LLVMValueRef value)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
-
-       if (dst_type)
-               return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
-       else
-               return value;
-}
-
-/**
- * Return a value that is equal to the given i32 \p index if it lies in [0,num)
- * or an undefined value in the same interval otherwise.
- */
-LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
-                                LLVMValueRef index,
-                                unsigned num)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
-       LLVMValueRef cc;
-
-       if (util_is_power_of_two_or_zero(num)) {
-               index = LLVMBuildAnd(builder, index, c_max, "");
-       } else {
-               /* In theory, this MAX pattern should result in code that is
-                * as good as the bit-wise AND above.
-                *
-                * In practice, LLVM generates worse code (at the time of
-                * writing), because its value tracking is not strong enough.
-                */
-               cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
-               index = LLVMBuildSelect(builder, cc, index, c_max, "");
-       }
-
-       return index;
-}
-
-static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
-                                LLVMValueRef value,
-                                unsigned swizzle_x,
-                                unsigned swizzle_y,
-                                unsigned swizzle_z,
-                                unsigned swizzle_w)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef swizzles[4];
-
-       swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
-       swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
-       swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
-       swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
-
-       return LLVMBuildShuffleVector(ctx->ac.builder,
-                                     value,
-                                     LLVMGetUndef(LLVMTypeOf(value)),
-                                     LLVMConstVector(swizzles, 4), "");
-}
-
-/**
- * Return the description of the array covering the given temporary register
- * index.
- */
-static unsigned
-get_temp_array_id(struct lp_build_tgsi_context *bld_base,
-                 unsigned reg_index,
-                 const struct tgsi_ind_register *reg)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
-       unsigned i;
-
-       if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
-               return reg->ArrayID;
-
-       for (i = 0; i < num_arrays; i++) {
-               const struct tgsi_array_info *array = &ctx->temp_arrays[i];
-
-               if (reg_index >= array->range.First && reg_index <= array->range.Last)
-                       return i + 1;
-       }
-
-       return 0;
-}
-
-static struct tgsi_declaration_range
-get_array_range(struct lp_build_tgsi_context *bld_base,
-               unsigned File, unsigned reg_index,
-               const struct tgsi_ind_register *reg)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct tgsi_declaration_range range;
-
-       if (File == TGSI_FILE_TEMPORARY) {
-               unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
-               if (array_id)
-                       return ctx->temp_arrays[array_id - 1].range;
-       }
-
-       range.First = 0;
-       range.Last = bld_base->info->file_max[File];
-       return range;
-}
-
-/**
- * For indirect registers, construct a pointer directly to the requested
- * element using getelementptr if possible.
- *
- * Returns NULL if the insertelement/extractelement fallback for array access
- * must be used.
- */
-static LLVMValueRef
-get_pointer_into_array(struct si_shader_context *ctx,
-                      unsigned file,
-                      unsigned swizzle,
-                      unsigned reg_index,
-                      const struct tgsi_ind_register *reg_indirect)
-{
-       unsigned array_id;
-       struct tgsi_array_info *array;
-       LLVMValueRef idxs[2];
-       LLVMValueRef index;
-       LLVMValueRef alloca;
-
-       if (file != TGSI_FILE_TEMPORARY)
-               return NULL;
-
-       array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
-       if (!array_id)
-               return NULL;
-
-       alloca = ctx->temp_array_allocas[array_id - 1];
-       if (!alloca)
-               return NULL;
-
-       array = &ctx->temp_arrays[array_id - 1];
-
-       if (!(array->writemask & (1 << swizzle)))
-               return ctx->undef_alloca;
-
-       index = si_get_indirect_index(ctx, reg_indirect, 1,
-                                     reg_index - ctx->temp_arrays[array_id - 1].range.First);
-
-       /* Ensure that the index is within a valid range, to guard against
-        * VM faults and overwriting critical data (e.g. spilled resource
-        * descriptors).
-        *
-        * TODO It should be possible to avoid the additional instructions
-        * if LLVM is changed so that it guarantuees:
-        * 1. the scratch space descriptor isolates the current wave (this
-        *    could even save the scratch offset SGPR at the cost of an
-        *    additional SALU instruction)
-        * 2. the memory for allocas must be allocated at the _end_ of the
-        *    scratch space (after spilled registers)
-        */
-       index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
-
-       index = ac_build_imad(&ctx->ac, index,
-                             LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
-                             LLVMConstInt(ctx->i32,
-                                          util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0));
-       idxs[0] = ctx->i32_0;
-       idxs[1] = index;
-       return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
-}
-
-LLVMValueRef
-si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
-                        LLVMTypeRef type,
-                        LLVMValueRef ptr,
-                        LLVMValueRef ptr2)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef values[2] = {
-               ac_to_integer(&ctx->ac, ptr),
-               ac_to_integer(&ctx->ac, ptr2),
-       };
-       LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
-       return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
-}
-
-static LLVMValueRef
-emit_array_fetch(struct lp_build_tgsi_context *bld_base,
-                unsigned File, enum tgsi_opcode_type type,
-                struct tgsi_declaration_range range,
-                unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       unsigned i, size = range.Last - range.First + 1;
-       LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
-       LLVMValueRef result = LLVMGetUndef(vec);
-       unsigned swizzle = swizzle_in;
-       struct tgsi_full_src_register tmp_reg = {};
-       tmp_reg.Register.File = File;
-       if (tgsi_type_is_64bit(type))
-               swizzle |= (swizzle_in + 1) << 16;
-
-       for (i = 0; i < size; ++i) {
-               tmp_reg.Register.Index = i + range.First;
-
-               LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
-               result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
-                       LLVMConstInt(ctx->i32, i, 0), "array_vector");
-       }
-       return result;
-}
-
-static LLVMValueRef
-load_value_from_array(struct lp_build_tgsi_context *bld_base,
-                     unsigned file,
-                     enum tgsi_opcode_type type,
-                     unsigned swizzle,
-                     unsigned reg_index,
-                     const struct tgsi_ind_register *reg_indirect)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef ptr;
-
-       ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
-       if (ptr) {
-               LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
-               if (tgsi_type_is_64bit(type)) {
-                       LLVMValueRef ptr_hi, val_hi;
-                       ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
-                       val_hi = LLVMBuildLoad(builder, ptr_hi, "");
-                       val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-                                                      val, val_hi);
-               }
-
-               return val;
-       } else {
-               struct tgsi_declaration_range range =
-                       get_array_range(bld_base, file, reg_index, reg_indirect);
-               LLVMValueRef index =
-                       si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
-               LLVMValueRef array =
-                       emit_array_fetch(bld_base, file, type, range, swizzle);
-               return LLVMBuildExtractElement(builder, array, index, "");
-       }
-}
-
-static void
-store_value_to_array(struct lp_build_tgsi_context *bld_base,
-                    LLVMValueRef value,
-                    unsigned file,
-                    unsigned chan_index,
-                    unsigned reg_index,
-                    const struct tgsi_ind_register *reg_indirect)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef ptr;
-
-       ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
-       if (ptr) {
-               LLVMBuildStore(builder, value, ptr);
-       } else {
-               unsigned i, size;
-               struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
-               LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
-               LLVMValueRef array =
-                       emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
-               LLVMValueRef temp_ptr;
-
-               array = LLVMBuildInsertElement(builder, array, value, index, "");
-
-               size = range.Last - range.First + 1;
-               for (i = 0; i < size; ++i) {
-                       switch(file) {
-                       case TGSI_FILE_OUTPUT:
-                               temp_ptr = ctx->outputs[i + range.First][chan_index];
-                               break;
-
-                       case TGSI_FILE_TEMPORARY:
-                               if (range.First + i >= ctx->temps_count)
-                                       continue;
-                               temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
-                               break;
-
-                       default:
-                               continue;
-                       }
-                       value = LLVMBuildExtractElement(builder, array,
-                               LLVMConstInt(ctx->i32, i, 0), "");
-                       LLVMBuildStore(builder, value, temp_ptr);
-               }
-       }
-}
-
-/* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
- * reload them at each use. This must be true if the shader is using
- * derivatives and KILL, because KILL can leave the WQM and then a lazy
- * input load isn't in the WQM anymore.
- */
-static bool si_preload_fs_inputs(struct si_shader_context *ctx)
-{
-       struct si_shader_selector *sel = ctx->shader->selector;
-
-       return sel->info.uses_derivatives &&
-              sel->info.uses_kill;
-}
-
-static LLVMValueRef
-get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
-              unsigned chan)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
-       return ctx->outputs[index][chan];
-}
-
-LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
-                               const struct tgsi_full_src_register *reg,
-                               enum tgsi_opcode_type type,
-                               unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef result = NULL, ptr, ptr2;
-       unsigned swizzle = swizzle_in & 0xffff;
-
-       if (swizzle_in == ~0) {
-               LLVMValueRef values[TGSI_NUM_CHANNELS];
-               unsigned chan;
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                       values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
-               }
-               return ac_build_gather_values(&ctx->ac, values,
-                                             TGSI_NUM_CHANNELS);
-       }
-
-       if (reg->Register.Indirect) {
-               LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
-                               swizzle, reg->Register.Index, &reg->Indirect);
-               return bitcast(bld_base, type, load);
-       }
-
-       switch(reg->Register.File) {
-       case TGSI_FILE_IMMEDIATE: {
-               LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
-               if (tgsi_type_is_64bit(type)) {
-                       result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
-                       result = LLVMConstInsertElement(result,
-                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
-                                                       ctx->i32_0);
-                       result = LLVMConstInsertElement(result,
-                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
-                                                       ctx->i32_1);
-                       return LLVMConstBitCast(result, ctype);
-               } else {
-                       return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
-               }
-       }
-
-       case TGSI_FILE_INPUT: {
-               unsigned index = reg->Register.Index;
-               LLVMValueRef input[4];
-
-               /* I don't think doing this for vertex shaders is beneficial.
-                * For those, we want to make sure the VMEM loads are executed
-                * only once. Fragment shaders don't care much, because
-                * v_interp instructions are much cheaper than VMEM loads.
-                */
-               if (!si_preload_fs_inputs(ctx) &&
-                   ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
-                       ctx->load_input(ctx, index, &ctx->input_decls[index], input);
-               else
-                       memcpy(input, &ctx->inputs[index * 4], sizeof(input));
-
-               result = input[swizzle];
-
-               if (tgsi_type_is_64bit(type)) {
-                       ptr = result;
-                       ptr2 = input[swizzle_in >> 16];
-                       return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-                                                       ptr, ptr2);
-               }
-               break;
-       }
-
-       case TGSI_FILE_TEMPORARY:
-               if (reg->Register.Index >= ctx->temps_count)
-                       return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
-               ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
-               if (tgsi_type_is_64bit(type)) {
-                       ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
-                       return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-                                                       LLVMBuildLoad(builder, ptr, ""),
-                                                       LLVMBuildLoad(builder, ptr2, ""));
-               }
-               result = LLVMBuildLoad(builder, ptr, "");
-               break;
-
-       case TGSI_FILE_OUTPUT:
-               ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
-               if (tgsi_type_is_64bit(type)) {
-                       ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
-                       return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-                                                       LLVMBuildLoad(builder, ptr, ""),
-                                                       LLVMBuildLoad(builder, ptr2, ""));
-               }
-               result = LLVMBuildLoad(builder, ptr, "");
-               break;
-
-       default:
-               return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
-       }
-
-       return bitcast(bld_base, type, result);
-}
-
-static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
-                                      const struct tgsi_full_src_register *reg,
-                                      enum tgsi_opcode_type type,
-                                      unsigned swizzle_in)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef cval = ctx->system_values[reg->Register.Index];
-       unsigned swizzle = swizzle_in & 0xffff;
-
-       if (tgsi_type_is_64bit(type)) {
-               LLVMValueRef lo, hi;
-
-               assert(swizzle == 0 || swizzle == 2);
-
-               lo = LLVMBuildExtractElement(
-                       builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
-               hi = LLVMBuildExtractElement(
-                       builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
-
-               return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
-                                               lo, hi);
-       }
-
-       if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
-               cval = LLVMBuildExtractElement(
-                       builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
-       } else {
-               assert(swizzle == 0);
-       }
-
-       return bitcast(bld_base, type, cval);
-}
-
-static void emit_declaration(struct lp_build_tgsi_context *bld_base,
-                            const struct tgsi_full_declaration *decl)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       unsigned first, last, i;
-       switch(decl->Declaration.File) {
-       case TGSI_FILE_ADDRESS:
-       {
-                unsigned idx;
-               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       unsigned chan;
-                       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                                ctx->addrs[idx][chan] = ac_build_alloca_undef(
-                                       &ctx->ac, ctx->i32, "");
-                       }
-               }
-               break;
-       }
-
-       case TGSI_FILE_TEMPORARY:
-       {
-               char name[18] = "";
-               LLVMValueRef array_alloca = NULL;
-               unsigned decl_size;
-               unsigned writemask = decl->Declaration.UsageMask;
-               first = decl->Range.First;
-               last = decl->Range.Last;
-               decl_size = 4 * ((last - first) + 1);
-
-               if (decl->Declaration.Array) {
-                       unsigned id = decl->Array.ArrayID - 1;
-                       unsigned array_size;
-
-                       writemask &= ctx->temp_arrays[id].writemask;
-                       ctx->temp_arrays[id].writemask = writemask;
-                       array_size = ((last - first) + 1) * util_bitcount(writemask);
-
-                       /* If the array has more than 16 elements, store it
-                        * in memory using an alloca that spans the entire
-                        * array.
-                        *
-                        * Otherwise, store each array element individually.
-                        * We will then generate vectors (per-channel, up to
-                        * <16 x float> if the usagemask is a single bit) for
-                        * indirect addressing.
-                        *
-                        * Note that 16 is the number of vector elements that
-                        * LLVM will store in a register, so theoretically an
-                        * array with up to 4 * 16 = 64 elements could be
-                        * handled this way, but whether that's a good idea
-                        * depends on VGPR register pressure elsewhere.
-                        *
-                        * FIXME: We shouldn't need to have the non-alloca
-                        * code path for arrays. LLVM should be smart enough to
-                        * promote allocas into registers when profitable.
-                        */
-                       if (array_size > 16 ||
-                           !ctx->screen->llvm_has_working_vgpr_indexing) {
-                               array_alloca = ac_build_alloca_undef(&ctx->ac,
-                                       LLVMArrayType(ctx->f32,
-                                                     array_size), "array");
-                               ctx->temp_array_allocas[id] = array_alloca;
-                       }
-               }
-
-               if (!ctx->temps_count) {
-                       ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
-                       ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
-               }
-               if (!array_alloca) {
-                       for (i = 0; i < decl_size; ++i) {
-#ifndef NDEBUG
-                               snprintf(name, sizeof(name), "TEMP%d.%c",
-                                        first + i / 4, "xyzw"[i % 4]);
-#endif
-                               ctx->temps[first * TGSI_NUM_CHANNELS + i] =
-                                       ac_build_alloca_undef(&ctx->ac,
-                                                             ctx->f32,
-                                                             name);
-                       }
-               } else {
-                       LLVMValueRef idxs[2] = {
-                               ctx->i32_0,
-                               NULL
-                       };
-                       unsigned j = 0;
-
-                       if (writemask != TGSI_WRITEMASK_XYZW &&
-                           !ctx->undef_alloca) {
-                               /* Create a dummy alloca. We use it so that we
-                                * have a pointer that is safe to load from if
-                                * a shader ever reads from a channel that
-                                * it never writes to.
-                                */
-                               ctx->undef_alloca = ac_build_alloca_undef(
-                                       &ctx->ac, ctx->f32, "undef");
-                       }
-
-                       for (i = 0; i < decl_size; ++i) {
-                               LLVMValueRef ptr;
-                               if (writemask & (1 << (i % 4))) {
-#ifndef NDEBUG
-                                       snprintf(name, sizeof(name), "TEMP%d.%c",
-                                                first + i / 4, "xyzw"[i % 4]);
-#endif
-                                       idxs[1] = LLVMConstInt(ctx->i32, j, 0);
-                                       ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
-                                       j++;
-                               } else {
-                                       ptr = ctx->undef_alloca;
-                               }
-                               ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
-                       }
-               }
-               break;
-       }
-       case TGSI_FILE_INPUT:
-       {
-               unsigned idx;
-               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       if (ctx->load_input &&
-                           ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
-                               ctx->input_decls[idx] = *decl;
-                               ctx->input_decls[idx].Range.First = idx;
-                               ctx->input_decls[idx].Range.Last = idx;
-                               ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
-
-                               if (si_preload_fs_inputs(ctx) ||
-                                   bld_base->info->processor != PIPE_SHADER_FRAGMENT)
-                                       ctx->load_input(ctx, idx, &ctx->input_decls[idx],
-                                                       &ctx->inputs[idx * 4]);
-                       }
-               }
-       }
-       break;
-
-       case TGSI_FILE_SYSTEM_VALUE:
-       {
-               unsigned idx;
-               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       si_load_system_value(ctx, idx, decl);
-               }
-       }
-       break;
-
-       case TGSI_FILE_OUTPUT:
-       {
-               char name[16] = "";
-               unsigned idx;
-               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       unsigned chan;
-                       assert(idx < RADEON_LLVM_MAX_OUTPUTS);
-                       if (ctx->outputs[idx][0])
-                               continue;
-                       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-#ifndef NDEBUG
-                               snprintf(name, sizeof(name), "OUT%d.%c",
-                                        idx, "xyzw"[chan % 4]);
-#endif
-                               ctx->outputs[idx][chan] = ac_build_alloca_undef(
-                                       &ctx->ac, ctx->f32, name);
-                       }
-               }
-               break;
-       }
-
-       case TGSI_FILE_MEMORY:
-               si_tgsi_declare_compute_memory(ctx, decl);
-               break;
-
-       default:
-               break;
-       }
-}
-
-void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
-                       const struct tgsi_full_instruction *inst,
-                       const struct tgsi_opcode_info *info,
-                       unsigned index,
-                       LLVMValueRef dst[4])
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       const struct tgsi_full_dst_register *reg = &inst->Dst[index];
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef temp_ptr, temp_ptr2 = NULL;
-       bool is_vec_store = false;
-       enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
-
-       if (dst[0]) {
-               LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
-               is_vec_store = (k == LLVMVectorTypeKind);
-       }
-
-       if (is_vec_store) {
-               LLVMValueRef values[4] = {};
-               uint32_t writemask = reg->Register.WriteMask;
-               while (writemask) {
-                       unsigned chan = u_bit_scan(&writemask);
-                       LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
-                       values[chan]  = LLVMBuildExtractElement(ctx->ac.builder,
-                                                       dst[0], index, "");
-               }
-               bld_base->emit_store(bld_base, inst, info, index, values);
-               return;
-       }
-
-       uint32_t writemask = reg->Register.WriteMask;
-       while (writemask) {
-               unsigned chan_index = u_bit_scan(&writemask);
-               LLVMValueRef value = dst[chan_index];
-
-               if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
-                       continue;
-               if (inst->Instruction.Saturate)
-                       value = ac_build_clamp(&ctx->ac, value);
-
-               if (reg->Register.File == TGSI_FILE_ADDRESS) {
-                       temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
-                       LLVMBuildStore(builder, value, temp_ptr);
-                       continue;
-               }
-
-               if (!tgsi_type_is_64bit(dtype))
-                       value = ac_to_float(&ctx->ac, value);
-
-               if (reg->Register.Indirect) {
-                       unsigned file = reg->Register.File;
-                       unsigned reg_index = reg->Register.Index;
-                       store_value_to_array(bld_base, value, file, chan_index,
-                                            reg_index, &reg->Indirect);
-               } else {
-                       switch(reg->Register.File) {
-                       case TGSI_FILE_OUTPUT:
-                               temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
-                               if (tgsi_type_is_64bit(dtype))
-                                       temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
-                               break;
-
-                       case TGSI_FILE_TEMPORARY:
-                       {
-                               if (reg->Register.Index >= ctx->temps_count)
-                                       continue;
-
-                               temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
-                               if (tgsi_type_is_64bit(dtype))
-                                       temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
-
-                               break;
-                       }
-                       default:
-                               return;
-                       }
-                       if (!tgsi_type_is_64bit(dtype))
-                               LLVMBuildStore(builder, value, temp_ptr);
-                       else {
-                               LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
-                                                                   LLVMVectorType(ctx->i32, 2), "");
-                               LLVMValueRef val2;
-                               value = LLVMBuildExtractElement(builder, ptr,
-                                                               ctx->i32_0, "");
-                               val2 = LLVMBuildExtractElement(builder, ptr,
-                                                              ctx->i32_1, "");
-
-                               LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
-                               LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
-                       }
-               }
-       }
-}
-
-static int get_line(int pc)
-{
-       /* Subtract 1 so that the number shown is that of the corresponding
-        * opcode in the TGSI dump, e.g. an if block has the same suffix as
-        * the instruction number of the corresponding TGSI IF.
-        */
-       return pc - 1;
-}
-
-static void bgnloop_emit(const struct lp_build_tgsi_action *action,
-                        struct lp_build_tgsi_context *bld_base,
-                        struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void brk_emit(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_break(&ctx->ac);
-}
-
-static void cont_emit(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_continue(&ctx->ac);
-}
-
-static void else_emit(const struct lp_build_tgsi_action *action,
-                     struct lp_build_tgsi_context *bld_base,
-                     struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_else(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void endif_emit(const struct lp_build_tgsi_action *action,
-                      struct lp_build_tgsi_context *bld_base,
-                      struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_endif(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void endloop_emit(const struct lp_build_tgsi_action *action,
-                        struct lp_build_tgsi_context *bld_base,
-                        struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void if_emit(const struct lp_build_tgsi_action *action,
-                   struct lp_build_tgsi_context *bld_base,
-                   struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
-}
-
-static void uif_emit(const struct lp_build_tgsi_action *action,
-                    struct lp_build_tgsi_context *bld_base,
-                    struct lp_build_emit_data *emit_data)
-{
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
-}
-
-static void emit_immediate(struct lp_build_tgsi_context *bld_base,
-                          const struct tgsi_full_immediate *imm)
-{
-       unsigned i;
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       for (i = 0; i < 4; ++i) {
-               ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
-                               LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
-       }
-
-       ctx->imms_num++;
-}
-
-void si_llvm_context_init(struct si_shader_context *ctx,
-                         struct si_screen *sscreen,
-                         struct ac_llvm_compiler *compiler,
-                         unsigned wave_size,
-                         unsigned ballot_mask_bits)
-{
-       struct lp_type type;
-
-       /* Initialize the gallivm object:
-        * We are only using the module, context, and builder fields of this struct.
-        * This should be enough for us to be able to pass our gallivm struct to the
-        * helper functions in the gallivm module.
-        */
-       memset(ctx, 0, sizeof(*ctx));
-       ctx->screen = sscreen;
-       ctx->compiler = compiler;
-
-       ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
-                            sscreen->info.family,
-                            AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
-                            wave_size, ballot_mask_bits);
-
-       ctx->gallivm.context = ctx->ac.context;
-       ctx->gallivm.module = ctx->ac.module;
-       ctx->gallivm.builder = ctx->ac.builder;
-
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
-       type.floating = true;
-       type.fixed = false;
-       type.sign = true;
-       type.norm = false;
-       type.width = 32;
-       type.length = 1;
-
-       lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
-       lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
-       lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
-       type.width *= 2;
-       lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
-       lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
-       lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
-
-       bld_base->soa = 1;
-       bld_base->emit_swizzle = emit_swizzle;
-       bld_base->emit_declaration = emit_declaration;
-       bld_base->emit_immediate = emit_immediate;
-
-       bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
-       bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
-       bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
-       bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
-       bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
-       bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
-       bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
-       bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
-
-       si_shader_context_init_alu(ctx);
-       si_shader_context_init_mem(ctx);
-
-       ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
-       ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
-       ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
-       ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
-       ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
-       ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
-       ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
-       ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
-       ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
-       ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
-       ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
-
-       ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
-       ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
-       ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
-       ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
-}
-
-/* Set the context to a certain TGSI shader. Can be called repeatedly
- * to change the shader. */
-void si_llvm_context_set_ir(struct si_shader_context *ctx,
-                           struct si_shader *shader,
-                           struct nir_shader *nir)
-{
-       struct si_shader_selector *sel = shader->selector;
-       const struct tgsi_shader_info *info = &sel->info;
-
-       ctx->shader = shader;
-       ctx->type = sel->type;
-       ctx->bld_base.info = info;
-
-       /* Clean up the old contents. */
-       FREE(ctx->temp_arrays);
-       ctx->temp_arrays = NULL;
-       FREE(ctx->temp_array_allocas);
-       ctx->temp_array_allocas = NULL;
-
-       FREE(ctx->imms);
-       ctx->imms = NULL;
-       ctx->imms_num = 0;
-
-       FREE(ctx->temps);
-       ctx->temps = NULL;
-       ctx->temps_count = 0;
-
-       ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
-       ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
-
-       ctx->num_samplers = util_last_bit(info->samplers_declared);
-       ctx->num_images = util_last_bit(info->images_declared);
-
-       if (nir)
-               return;
-
-       if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
-               int size = info->array_max[TGSI_FILE_TEMPORARY];
-
-               ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
-               ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
-
-               tgsi_scan_arrays(sel->tokens, TGSI_FILE_TEMPORARY, size,
-                                ctx->temp_arrays);
-       }
-       if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
-               int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
-               ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
-       }
-
-       /* Re-set these to start with a clean slate. */
-       ctx->bld_base.num_instructions = 0;
-       ctx->bld_base.pc = 0;
-       memset(ctx->input_decls, 0, sizeof(ctx->input_decls));
-       memset(ctx->inputs, 0, sizeof(ctx->inputs));
-       memset(ctx->outputs, 0, sizeof(ctx->outputs));
-
-       ctx->bld_base.emit_store = si_llvm_emit_store;
-       ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
-       ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
-       ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
-       ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
-       ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
-}
-
-void si_llvm_create_func(struct si_shader_context *ctx,
-                        const char *name,
-                        LLVMTypeRef *return_types, unsigned num_return_elems)
-{
-       LLVMTypeRef ret_type;
-       enum ac_llvm_calling_convention call_conv;
-       enum pipe_shader_type real_shader_type;
-
-       if (num_return_elems)
-               ret_type = LLVMStructTypeInContext(ctx->ac.context,
-                                                  return_types,
-                                                  num_return_elems, true);
-       else
-               ret_type = ctx->voidt;
-
-       real_shader_type = ctx->type;
-
-       /* LS is merged into HS (TCS), and ES is merged into GS. */
-       if (ctx->screen->info.chip_class >= GFX9) {
-               if (ctx->shader->key.as_ls)
-                       real_shader_type = PIPE_SHADER_TESS_CTRL;
-               else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
-                       real_shader_type = PIPE_SHADER_GEOMETRY;
-       }
-
-       switch (real_shader_type) {
-       case PIPE_SHADER_VERTEX:
-       case PIPE_SHADER_TESS_EVAL:
-               call_conv = AC_LLVM_AMDGPU_VS;
-               break;
-       case PIPE_SHADER_TESS_CTRL:
-               call_conv = AC_LLVM_AMDGPU_HS;
-               break;
-       case PIPE_SHADER_GEOMETRY:
-               call_conv = AC_LLVM_AMDGPU_GS;
-               break;
-       case PIPE_SHADER_FRAGMENT:
-               call_conv = AC_LLVM_AMDGPU_PS;
-               break;
-       case PIPE_SHADER_COMPUTE:
-               call_conv = AC_LLVM_AMDGPU_CS;
-               break;
-       default:
-               unreachable("Unhandle shader type");
-       }
-
-       /* Setup the function */
-       ctx->return_type = ret_type;
-       ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
-                                    ret_type, ctx->gallivm.module);
-}
-
-void si_llvm_optimize_module(struct si_shader_context *ctx)
-{
-       /* Dump LLVM IR before any optimization passes */
-       if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
-           si_can_dump_shader(ctx->screen, ctx->type))
-               LLVMDumpModule(ctx->gallivm.module);
-
-       /* Run the pass */
-       LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
-       LLVMDisposeBuilder(ctx->ac.builder);
-}
-
-void si_llvm_dispose(struct si_shader_context *ctx)
-{
-       LLVMDisposeModule(ctx->gallivm.module);
-       LLVMContextDispose(ctx->gallivm.context);
-       FREE(ctx->temp_arrays);
-       ctx->temp_arrays = NULL;
-       FREE(ctx->temp_array_allocas);
-       ctx->temp_array_allocas = NULL;
-       FREE(ctx->temps);
-       ctx->temps = NULL;
-       ctx->temps_count = 0;
-       FREE(ctx->imms);
-       ctx->imms = NULL;
-       ctx->imms_num = 0;
-       ac_llvm_context_dispose(&ctx->ac);
-}
index 941a397525e2daca680523f5202f10c0a24aa1a8..bf4a22de13bf8b691ccaa00d61d43d528a10cf0a 100644 (file)
@@ -27,7 +27,6 @@
 
 #include "compiler/nir/nir_serialize.h"
 #include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
 #include "util/u_async_debug.h"
@@ -51,11 +50,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
        unsigned ir_size;
        void *ir_binary;
 
-       if (sel->tokens) {
-               ir_binary = sel->tokens;
-               ir_size = tgsi_num_tokens(sel->tokens) *
-                         sizeof(struct tgsi_token);
-       } else if (sel->nir_binary) {
+       if (sel->nir_binary) {
                ir_binary = sel->nir_binary;
                ir_size = sel->nir_size;
        } else {
@@ -2153,7 +2148,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
                main_part->key.as_ngg = key->as_ngg;
                main_part->is_monolithic = false;
 
-               if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
+               if (si_compile_shader(sscreen, compiler_state->compiler,
                                           main_part, &compiler_state->debug) != 0) {
                        FREE(main_part);
                        return false;
@@ -2516,7 +2511,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                     sel->type == PIPE_SHADER_GEOMETRY))
                        shader->key.as_ngg = 1;
 
-               if (sel->tokens || sel->nir) {
+               if (sel->nir) {
                        si_get_ir_cache_key(sel, shader->key.as_ngg,
                                            shader->key.as_es, ir_sha1_cache_key);
                }
@@ -2531,7 +2526,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                        simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
                        /* Compile the shader if it hasn't been loaded from the cache. */
-                       if (si_compile_tgsi_shader(sscreen, compiler, shader,
+                       if (si_compile_shader(sscreen, compiler, shader,
                                                   debug) != 0) {
                                FREE(shader);
                                fprintf(stderr, "radeonsi: can't compile a main shader part\n");
@@ -2695,44 +2690,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
        sel->so = state->stream_output;
 
-       if (state->type == PIPE_SHADER_IR_TGSI &&
-           !sscreen->options.enable_nir) {
-               sel->tokens = tgsi_dup_tokens(state->tokens);
-               if (!sel->tokens) {
-                       FREE(sel);
-                       return NULL;
-               }
-
-               tgsi_scan_shader(state->tokens, &sel->info);
-               tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
-               /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
-               if (sel->info.uses_persp_opcode_interp_centroid)
-                       sel->info.uses_persp_centroid = true;
-
-               if (sel->info.uses_linear_opcode_interp_centroid)
-                       sel->info.uses_linear_centroid = true;
-
-               if (sel->info.uses_persp_opcode_interp_offset ||
-                   sel->info.uses_persp_opcode_interp_sample)
-                       sel->info.uses_persp_center = true;
-
-               if (sel->info.uses_linear_opcode_interp_offset ||
-                   sel->info.uses_linear_opcode_interp_sample)
-                       sel->info.uses_linear_center = true;
+       if (state->type == PIPE_SHADER_IR_TGSI) {
+               sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
        } else {
-               if (state->type == PIPE_SHADER_IR_TGSI) {
-                       sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
-               } else {
-                       assert(state->type == PIPE_SHADER_IR_NIR);
-                       sel->nir = state->ir.nir;
-               }
-
-               si_nir_scan_shader(sel->nir, &sel->info);
-               si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
-               si_nir_adjust_driver_locations(sel->nir);
+               assert(state->type == PIPE_SHADER_IR_NIR);
+               sel->nir = state->ir.nir;
        }
 
+       si_nir_scan_shader(sel->nir, &sel->info);
+       si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+       si_nir_adjust_driver_locations(sel->nir);
+
        sel->type = sel->info.processor;
        p_atomic_inc(&sscreen->num_shaders_created);
        si_get_active_slot_masks(&sel->info,
@@ -3304,7 +3272,6 @@ void si_destroy_shader_selector(struct si_context *sctx,
 
        util_queue_fence_destroy(&sel->ready);
        simple_mtx_destroy(&sel->mutex);
-       free(sel->tokens);
        ralloc_free(sel->nir);
        free(sel->nir_binary);
        free(sel);
index ffec877082398d1e08cb011898da2dd2f455fef5..e5f333942b71a320edddbc906800c6de94e55614 100644 (file)
@@ -552,12 +552,6 @@ TODO: document the other workarounds.
         <application name="Rocket League" executable="RocketLeague">
             <option name="radeonsi_zerovram" value="true" />
         </application>
-        <application name="Civilization 6" executable="Civ6">
-            <option name="radeonsi_enable_nir" value="true"/>
-        </application>
-        <application name="Civilization 6" executable="Civ6Sub">
-            <option name="radeonsi_enable_nir" value="true"/>
-        </application>
         <application name="DiRT Rally" executable="DirtRally">
             <option name="radeonsi_prim_restart_tri_strips_only" value="true"/>
         </application>