amd: Move all amd/common code that depends on LLVM to amd/llvm.
authorTimur Kristóf <timur.kristof@gmail.com>
Fri, 27 Sep 2019 08:29:51 +0000 (10:29 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 8 Oct 2019 00:44:08 +0000 (00:44 +0000)
This commit is a step towards the goal of being able to build RADV
without LLVM. In the future we would like to offer the option to
use RADV solely with ACO. There is still a need for the common AMD
code located in amd/common but the LLVM specific parts need to be
separated.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Acked-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
33 files changed:
src/amd/Android.common.mk
src/amd/Makefile.sources
src/amd/common/ac_llvm_build.c [deleted file]
src/amd/common/ac_llvm_build.h [deleted file]
src/amd/common/ac_llvm_cull.c [deleted file]
src/amd/common/ac_llvm_cull.h [deleted file]
src/amd/common/ac_llvm_helper.cpp [deleted file]
src/amd/common/ac_llvm_util.c [deleted file]
src/amd/common/ac_llvm_util.h [deleted file]
src/amd/common/ac_nir_to_llvm.c [deleted file]
src/amd/common/ac_nir_to_llvm.h [deleted file]
src/amd/common/ac_shader_abi.h [deleted file]
src/amd/common/meson.build
src/amd/compiler/meson.build
src/amd/llvm/ac_llvm_build.c [new file with mode: 0644]
src/amd/llvm/ac_llvm_build.h [new file with mode: 0644]
src/amd/llvm/ac_llvm_cull.c [new file with mode: 0644]
src/amd/llvm/ac_llvm_cull.h [new file with mode: 0644]
src/amd/llvm/ac_llvm_helper.cpp [new file with mode: 0644]
src/amd/llvm/ac_llvm_util.c [new file with mode: 0644]
src/amd/llvm/ac_llvm_util.h [new file with mode: 0644]
src/amd/llvm/ac_nir_to_llvm.c [new file with mode: 0644]
src/amd/llvm/ac_nir_to_llvm.h [new file with mode: 0644]
src/amd/llvm/ac_shader_abi.h [new file with mode: 0644]
src/amd/llvm/meson.build [new file with mode: 0644]
src/amd/meson.build
src/amd/vulkan/meson.build
src/gallium/drivers/r600/Android.mk
src/gallium/drivers/radeonsi/Android.mk
src/gallium/drivers/radeonsi/meson.build
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
src/gallium/winsys/amdgpu/drm/meson.build
src/meson.build

index d5a266215f02af1db424b00a538c0216c13e6749..f77aa79c7d86e5af789990a8cbcf119fe8d6e385 100644 (file)
@@ -30,9 +30,8 @@ LOCAL_MODULE := libmesa_amd_common
 
 LOCAL_SRC_FILES := \
        $(AMD_COMMON_FILES) \
-       $(AMD_COMPILER_FILES) \
-       $(AMD_DEBUG_FILES) \
-       $(AMD_NIR_FILES)
+       $(AMD_COMMON_LLVM_FILES) \
+       $(AMD_DEBUG_FILES)
 
 LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU   # instructs LLVM to declare LLVMInitializeAMDGPU* functions
 
@@ -72,6 +71,7 @@ LOCAL_C_INCLUDES := \
        $(MESA_TOP)/include \
        $(MESA_TOP)/src \
        $(MESA_TOP)/src/amd/common \
+       $(MESA_TOP)/src/amd/llvm \
        $(MESA_TOP)/src/compiler \
        $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
        $(MESA_TOP)/src/gallium/include \
index 22f71bf17581c1cbcce722d9b18f806ae6220d68..245ce1042ee971138bf5f7e06230790aec8e822a 100644 (file)
@@ -36,33 +36,30 @@ ADDRLIB_FILES = \
        addrlib/src/r800/siaddrlib.cpp \
        addrlib/src/r800/siaddrlib.h
 
-AMD_COMPILER_FILES = \
+AMD_COMMON_FILES = \
        common/ac_binary.c \
        common/ac_binary.h \
        common/ac_exp_param.h \
-       common/ac_llvm_build.c \
-       common/ac_llvm_build.h \
-       common/ac_llvm_cull.c \
-       common/ac_llvm_cull.h \
-       common/ac_llvm_helper.cpp \
-       common/ac_llvm_util.c \
-       common/ac_llvm_util.h \
+       common/ac_gpu_info.c \
+       common/ac_gpu_info.h \
+       common/ac_surface.c \
+       common/ac_surface.h \
        common/ac_rtld.c \
        common/ac_rtld.h \
-       common/ac_shader_abi.h \
        common/ac_shader_util.c \
        common/ac_shader_util.h
 
-
-AMD_NIR_FILES = \
-       common/ac_nir_to_llvm.c \
-       common/ac_nir_to_llvm.h
-
-AMD_COMMON_FILES = \
-       common/ac_gpu_info.c \
-       common/ac_gpu_info.h \
-       common/ac_surface.c \
-       common/ac_surface.h
+AMD_COMMON_LLVM_FILES = \
+       llvm/ac_llvm_build.c \
+       llvm/ac_llvm_build.h \
+       llvm/ac_llvm_cull.c \
+       llvm/ac_llvm_cull.h \
+       llvm/ac_llvm_helper.cpp \
+       llvm/ac_llvm_util.c \
+       llvm/ac_llvm_util.h \
+       llvm/ac_shader_abi.h \
+       llvm/ac_nir_to_llvm.c \
+       llvm/ac_nir_to_llvm.h
 
 AMD_DEBUG_FILES = \
        common/ac_debug.c \
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
deleted file mode 100644 (file)
index cda2daa..0000000
+++ /dev/null
@@ -1,4478 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
-#include "ac_llvm_build.h"
-
-#include <llvm-c/Core.h>
-#include <llvm/Config/llvm-config.h>
-
-#include "c11/threads.h"
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "ac_llvm_util.h"
-#include "ac_shader_util.h"
-#include "ac_exp_param.h"
-#include "util/bitscan.h"
-#include "util/macros.h"
-#include "util/u_atomic.h"
-#include "util/u_math.h"
-#include "sid.h"
-
-#include "shader_enums.h"
-
-#define AC_LLVM_INITIAL_CF_DEPTH 4
-
-/* Data for if/else/endif and bgnloop/endloop control flow structures.
- */
-struct ac_llvm_flow {
-       /* Loop exit or next part of if/else/endif. */
-       LLVMBasicBlockRef next_block;
-       LLVMBasicBlockRef loop_entry_block;
-};
-
-/* Initialize module-independent parts of the context.
- *
- * The caller is responsible for initializing ctx::module and ctx::builder.
- */
-void
-ac_llvm_context_init(struct ac_llvm_context *ctx,
-                    struct ac_llvm_compiler *compiler,
-                    enum chip_class chip_class, enum radeon_family family,
-                    enum ac_float_mode float_mode, unsigned wave_size,
-                    unsigned ballot_mask_bits)
-{
-       LLVMValueRef args[1];
-
-       ctx->context = LLVMContextCreate();
-
-       ctx->chip_class = chip_class;
-       ctx->family = family;
-       ctx->wave_size = wave_size;
-       ctx->ballot_mask_bits = ballot_mask_bits;
-       ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
-                                                      : compiler->tm,
-                                      ctx->context);
-       ctx->builder = ac_create_builder(ctx->context, float_mode);
-
-       ctx->voidt = LLVMVoidTypeInContext(ctx->context);
-       ctx->i1 = LLVMInt1TypeInContext(ctx->context);
-       ctx->i8 = LLVMInt8TypeInContext(ctx->context);
-       ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
-       ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
-       ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
-       ctx->intptr = ctx->i32;
-       ctx->f16 = LLVMHalfTypeInContext(ctx->context);
-       ctx->f32 = LLVMFloatTypeInContext(ctx->context);
-       ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
-       ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
-       ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
-       ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
-       ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
-       ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
-       ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
-       ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
-       ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
-       ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
-       ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
-
-       ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
-       ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
-       ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
-       ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
-       ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
-       ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
-       ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
-       ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
-       ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
-       ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
-       ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
-       ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
-       ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
-       ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
-
-       ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
-       ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
-
-       ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
-                                                    "range", 5);
-
-       ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
-                                                              "invariant.load", 14);
-
-       ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
-
-       args[0] = LLVMConstReal(ctx->f32, 2.5);
-       ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
-
-       ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
-                                                       "amdgpu.uniform", 14);
-
-       ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
-       ctx->flow = calloc(1, sizeof(*ctx->flow));
-}
-
-void
-ac_llvm_context_dispose(struct ac_llvm_context *ctx)
-{
-       free(ctx->flow->stack);
-       free(ctx->flow);
-       ctx->flow = NULL;
-}
-
-int
-ac_get_llvm_num_components(LLVMValueRef value)
-{
-       LLVMTypeRef type = LLVMTypeOf(value);
-       unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
-                                     ? LLVMGetVectorSize(type)
-                                     : 1;
-       return num_components;
-}
-
-LLVMValueRef
-ac_llvm_extract_elem(struct ac_llvm_context *ac,
-                    LLVMValueRef value,
-                    int index)
-{
-       if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
-               assert(index == 0);
-               return value;
-       }
-
-       return LLVMBuildExtractElement(ac->builder, value,
-                                      LLVMConstInt(ac->i32, index, false), "");
-}
-
-int
-ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
-{
-       if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
-               type = LLVMGetElementType(type);
-
-       if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
-               return LLVMGetIntTypeWidth(type);
-
-       if (type == ctx->f16)
-               return 16;
-       if (type == ctx->f32)
-               return 32;
-       if (type == ctx->f64)
-               return 64;
-
-       unreachable("Unhandled type kind in get_elem_bits");
-}
-
-unsigned
-ac_get_type_size(LLVMTypeRef type)
-{
-       LLVMTypeKind kind = LLVMGetTypeKind(type);
-
-       switch (kind) {
-       case LLVMIntegerTypeKind:
-               return LLVMGetIntTypeWidth(type) / 8;
-       case LLVMHalfTypeKind:
-               return 2;
-       case LLVMFloatTypeKind:
-               return 4;
-       case LLVMDoubleTypeKind:
-               return 8;
-       case LLVMPointerTypeKind:
-               if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
-                       return 4;
-               return 8;
-       case LLVMVectorTypeKind:
-               return LLVMGetVectorSize(type) *
-                      ac_get_type_size(LLVMGetElementType(type));
-       case LLVMArrayTypeKind:
-               return LLVMGetArrayLength(type) *
-                      ac_get_type_size(LLVMGetElementType(type));
-       default:
-               assert(0);
-               return 0;
-       }
-}
-
-static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
-       if (t == ctx->i8)
-               return ctx->i8;
-       else if (t == ctx->f16 || t == ctx->i16)
-               return ctx->i16;
-       else if (t == ctx->f32 || t == ctx->i32)
-               return ctx->i32;
-       else if (t == ctx->f64 || t == ctx->i64)
-               return ctx->i64;
-       else
-               unreachable("Unhandled integer size");
-}
-
-LLVMTypeRef
-ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
-       if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
-               LLVMTypeRef elem_type = LLVMGetElementType(t);
-               return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
-                                     LLVMGetVectorSize(t));
-       }
-       if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
-               switch (LLVMGetPointerAddressSpace(t)) {
-               case AC_ADDR_SPACE_GLOBAL:
-                       return ctx->i64;
-               case AC_ADDR_SPACE_LDS:
-                       return ctx->i32;
-               default:
-                       unreachable("unhandled address space");
-               }
-       }
-       return to_integer_type_scalar(ctx, t);
-}
-
-LLVMValueRef
-ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
-       LLVMTypeRef type = LLVMTypeOf(v);
-       if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
-               return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
-       }
-       return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
-}
-
-LLVMValueRef
-ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
-       LLVMTypeRef type = LLVMTypeOf(v);
-       if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
-               return v;
-       return ac_to_integer(ctx, v);
-}
-
-static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
-       if (t == ctx->i8)
-               return ctx->i8;
-       else if (t == ctx->i16 || t == ctx->f16)
-               return ctx->f16;
-       else if (t == ctx->i32 || t == ctx->f32)
-               return ctx->f32;
-       else if (t == ctx->i64 || t == ctx->f64)
-               return ctx->f64;
-       else
-               unreachable("Unhandled float size");
-}
-
-LLVMTypeRef
-ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
-       if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
-               LLVMTypeRef elem_type = LLVMGetElementType(t);
-               return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
-                                     LLVMGetVectorSize(t));
-       }
-       return to_float_type_scalar(ctx, t);
-}
-
-LLVMValueRef
-ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
-       LLVMTypeRef type = LLVMTypeOf(v);
-       return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
-}
-
-
-LLVMValueRef
-ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
-                  LLVMTypeRef return_type, LLVMValueRef *params,
-                  unsigned param_count, unsigned attrib_mask)
-{
-       LLVMValueRef function, call;
-       bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
-
-       function = LLVMGetNamedFunction(ctx->module, name);
-       if (!function) {
-               LLVMTypeRef param_types[32], function_type;
-               unsigned i;
-
-               assert(param_count <= 32);
-
-               for (i = 0; i < param_count; ++i) {
-                       assert(params[i]);
-                       param_types[i] = LLVMTypeOf(params[i]);
-               }
-               function_type =
-                   LLVMFunctionType(return_type, param_types, param_count, 0);
-               function = LLVMAddFunction(ctx->module, name, function_type);
-
-               LLVMSetFunctionCallConv(function, LLVMCCallConv);
-               LLVMSetLinkage(function, LLVMExternalLinkage);
-
-               if (!set_callsite_attrs)
-                       ac_add_func_attributes(ctx->context, function, attrib_mask);
-       }
-
-       call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
-       if (set_callsite_attrs)
-               ac_add_func_attributes(ctx->context, call, attrib_mask);
-       return call;
-}
-
-/**
- * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
- * intrinsic names).
- */
-void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
-{
-       LLVMTypeRef elem_type = type;
-
-       assert(bufsize >= 8);
-
-       if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
-               int ret = snprintf(buf, bufsize, "v%u",
-                                       LLVMGetVectorSize(type));
-               if (ret < 0) {
-                       char *type_name = LLVMPrintTypeToString(type);
-                       fprintf(stderr, "Error building type name for: %s\n",
-                               type_name);
-                       LLVMDisposeMessage(type_name);
-                       return;
-               }
-               elem_type = LLVMGetElementType(type);
-               buf += ret;
-               bufsize -= ret;
-       }
-       switch (LLVMGetTypeKind(elem_type)) {
-       default: break;
-       case LLVMIntegerTypeKind:
-               snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
-               break;
-       case LLVMHalfTypeKind:
-               snprintf(buf, bufsize, "f16");
-               break;
-       case LLVMFloatTypeKind:
-               snprintf(buf, bufsize, "f32");
-               break;
-       case LLVMDoubleTypeKind:
-               snprintf(buf, bufsize, "f64");
-               break;
-       }
-}
-
-/**
- * Helper function that builds an LLVM IR PHI node and immediately adds
- * incoming edges.
- */
-LLVMValueRef
-ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
-            unsigned count_incoming, LLVMValueRef *values,
-            LLVMBasicBlockRef *blocks)
-{
-       LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
-       LLVMAddIncoming(phi, values, blocks, count_incoming);
-       return phi;
-}
-
-void ac_build_s_barrier(struct ac_llvm_context *ctx)
-{
-       ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
-                          0, AC_FUNC_ATTR_CONVERGENT);
-}
-
-/* Prevent optimizations (at least of memory accesses) across the current
- * point in the program by emitting empty inline assembly that is marked as
- * having side effects.
- *
- * Optionally, a value can be passed through the inline assembly to prevent
- * LLVM from hoisting calls to ReadNone functions.
- */
-void
-ac_build_optimization_barrier(struct ac_llvm_context *ctx,
-                             LLVMValueRef *pvgpr)
-{
-       static int counter = 0;
-
-       LLVMBuilderRef builder = ctx->builder;
-       char code[16];
-
-       snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
-
-       if (!pvgpr) {
-               LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
-               LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
-               LLVMBuildCall(builder, inlineasm, NULL, 0, "");
-       } else {
-               LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
-               LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
-               LLVMValueRef vgpr = *pvgpr;
-               LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
-               unsigned vgpr_size = ac_get_type_size(vgpr_type);
-               LLVMValueRef vgpr0;
-
-               assert(vgpr_size % 4 == 0);
-
-               vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
-               vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
-               vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
-               vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
-               vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
-
-               *pvgpr = vgpr;
-       }
-}
-
-LLVMValueRef
-ac_build_shader_clock(struct ac_llvm_context *ctx)
-{
-       const char *intr = LLVM_VERSION_MAJOR >= 9 && ctx->chip_class >= GFX8 ?
-                               "llvm.amdgcn.s.memrealtime" : "llvm.readcyclecounter";
-       LLVMValueRef tmp = ac_build_intrinsic(ctx, intr, ctx->i64, NULL, 0, 0);
-       return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
-}
-
-LLVMValueRef
-ac_build_ballot(struct ac_llvm_context *ctx,
-               LLVMValueRef value)
-{
-       const char *name;
-
-       if (LLVM_VERSION_MAJOR >= 9) {
-               if (ctx->wave_size == 64)
-                       name = "llvm.amdgcn.icmp.i64.i32";
-               else
-                       name = "llvm.amdgcn.icmp.i32.i32";
-       } else {
-               name = "llvm.amdgcn.icmp.i32";
-       }
-       LLVMValueRef args[3] = {
-               value,
-               ctx->i32_0,
-               LLVMConstInt(ctx->i32, LLVMIntNE, 0)
-       };
-
-       /* We currently have no other way to prevent LLVM from lifting the icmp
-        * calls to a dominating basic block.
-        */
-       ac_build_optimization_barrier(ctx, &args[0]);
-
-       args[0] = ac_to_integer(ctx, args[0]);
-
-       return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
-                                 AC_FUNC_ATTR_NOUNWIND |
-                                 AC_FUNC_ATTR_READNONE |
-                                 AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
-                                LLVMValueRef value)
-{
-       const char *name = LLVM_VERSION_MAJOR >= 9 ? "llvm.amdgcn.icmp.i64.i1" : "llvm.amdgcn.icmp.i1";
-       LLVMValueRef args[3] = {
-               value,
-               ctx->i1false,
-               LLVMConstInt(ctx->i32, LLVMIntNE, 0),
-       };
-
-       return ac_build_intrinsic(ctx, name, ctx->i64, args, 3,
-                                 AC_FUNC_ATTR_NOUNWIND |
-                                 AC_FUNC_ATTR_READNONE |
-                                 AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
-       LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
-       LLVMValueRef vote_set = ac_build_ballot(ctx, value);
-       return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
-}
-
-LLVMValueRef
-ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
-       LLVMValueRef vote_set = ac_build_ballot(ctx, value);
-       return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
-                            LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
-}
-
-LLVMValueRef
-ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
-       LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
-       LLVMValueRef vote_set = ac_build_ballot(ctx, value);
-
-       LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
-                                        vote_set, active_set, "");
-       LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
-                                         vote_set,
-                                         LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
-       return LLVMBuildOr(ctx->builder, all, none, "");
-}
-
-LLVMValueRef
-ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
-                              unsigned value_count, unsigned component)
-{
-       LLVMValueRef vec = NULL;
-
-       if (value_count == 1) {
-               return values[component];
-       } else if (!value_count)
-               unreachable("value_count is 0");
-
-       for (unsigned i = component; i < value_count + component; i++) {
-               LLVMValueRef value = values[i];
-
-               if (i == component)
-                       vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
-               LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
-               vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
-       }
-       return vec;
-}
-
-LLVMValueRef
-ac_build_gather_values_extended(struct ac_llvm_context *ctx,
-                               LLVMValueRef *values,
-                               unsigned value_count,
-                               unsigned value_stride,
-                               bool load,
-                               bool always_vector)
-{
-       LLVMBuilderRef builder = ctx->builder;
-       LLVMValueRef vec = NULL;
-       unsigned i;
-
-       if (value_count == 1 && !always_vector) {
-               if (load)
-                       return LLVMBuildLoad(builder, values[0], "");
-               return values[0];
-       } else if (!value_count)
-               unreachable("value_count is 0");
-
-       for (i = 0; i < value_count; i++) {
-               LLVMValueRef value = values[i * value_stride];
-               if (load)
-                       value = LLVMBuildLoad(builder, value, "");
-
-               if (!i)
-                       vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
-               LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
-               vec = LLVMBuildInsertElement(builder, vec, value, index, "");
-       }
-       return vec;
-}
-
-LLVMValueRef
-ac_build_gather_values(struct ac_llvm_context *ctx,
-                      LLVMValueRef *values,
-                      unsigned value_count)
-{
-       return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
-}
-
-/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
- * channels with undef. Extract at most src_channels components from the input.
- */
-static LLVMValueRef
-ac_build_expand(struct ac_llvm_context *ctx,
-               LLVMValueRef value,
-               unsigned src_channels,
-               unsigned dst_channels)
-{
-       LLVMTypeRef elemtype;
-       LLVMValueRef chan[dst_channels];
-
-       if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
-               unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
-
-               if (src_channels == dst_channels && vec_size == dst_channels)
-                       return value;
-
-               src_channels = MIN2(src_channels, vec_size);
-
-               for (unsigned i = 0; i < src_channels; i++)
-                       chan[i] = ac_llvm_extract_elem(ctx, value, i);
-
-               elemtype = LLVMGetElementType(LLVMTypeOf(value));
-       } else {
-               if (src_channels) {
-                       assert(src_channels == 1);
-                       chan[0] = value;
-               }
-               elemtype = LLVMTypeOf(value);
-       }
-
-       for (unsigned i = src_channels; i < dst_channels; i++)
-               chan[i] = LLVMGetUndef(elemtype);
-
-       return ac_build_gather_values(ctx, chan, dst_channels);
-}
-
-/* Extract components [start, start + channels) from a vector.
- */
-LLVMValueRef
-ac_extract_components(struct ac_llvm_context *ctx,
-                     LLVMValueRef value,
-                     unsigned start,
-                     unsigned channels)
-{
-       LLVMValueRef chan[channels];
-
-       for (unsigned i = 0; i < channels; i++)
-               chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
-
-       return ac_build_gather_values(ctx, chan, channels);
-}
-
-/* Expand a scalar or vector to <4 x type> by filling the remaining channels
- * with undef. Extract at most num_channels components from the input.
- */
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
-                                    LLVMValueRef value,
-                                    unsigned num_channels)
-{
-       return ac_build_expand(ctx, value, num_channels, 4);
-}
-
-LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
-       unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
-       const char *name;
-
-       if (type_size == 2)
-               name = "llvm.rint.f16";
-       else if (type_size == 4)
-               name = "llvm.rint.f32";
-       else
-               name = "llvm.rint.f64";
-
-       return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_fdiv(struct ac_llvm_context *ctx,
-             LLVMValueRef num,
-             LLVMValueRef den)
-{
-       /* If we do (num / den), LLVM >= 7.0 does:
-        *    return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
-        *
-        * If we do (num * (1 / den)), LLVM does:
-        *    return num * v_rcp_f32(den);
-        */
-       LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
-       LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
-       LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
-
-       /* Use v_rcp_f32 instead of precise division. */
-       if (!LLVMIsConstant(ret))
-               LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
-       return ret;
-}
-
-/* See fast_idiv_by_const.h. */
-/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
-LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
-                               LLVMValueRef num,
-                               LLVMValueRef multiplier,
-                               LLVMValueRef pre_shift,
-                               LLVMValueRef post_shift,
-                               LLVMValueRef increment)
-{
-       LLVMBuilderRef builder = ctx->builder;
-
-       num = LLVMBuildLShr(builder, num, pre_shift, "");
-       num = LLVMBuildMul(builder,
-                          LLVMBuildZExt(builder, num, ctx->i64, ""),
-                          LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
-       num = LLVMBuildAdd(builder, num,
-                          LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
-       num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
-       num = LLVMBuildTrunc(builder, num, ctx->i32, "");
-       return LLVMBuildLShr(builder, num, post_shift, "");
-}
-
-/* See fast_idiv_by_const.h. */
-/* If num != UINT_MAX, this more efficient version can be used. */
-/* Set: increment = util_fast_udiv_info::increment; */
-LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
-                                   LLVMValueRef num,
-                                   LLVMValueRef multiplier,
-                                   LLVMValueRef pre_shift,
-                                   LLVMValueRef post_shift,
-                                   LLVMValueRef increment)
-{
-       LLVMBuilderRef builder = ctx->builder;
-
-       num = LLVMBuildLShr(builder, num, pre_shift, "");
-       num = LLVMBuildNUWAdd(builder, num, increment, "");
-       num = LLVMBuildMul(builder,
-                          LLVMBuildZExt(builder, num, ctx->i64, ""),
-                          LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
-       num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
-       num = LLVMBuildTrunc(builder, num, ctx->i32, "");
-       return LLVMBuildLShr(builder, num, post_shift, "");
-}
-
-/* See fast_idiv_by_const.h. */
-/* Both operands must fit in 31 bits and the divisor must not be 1. */
-LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
-                                             LLVMValueRef num,
-                                             LLVMValueRef multiplier,
-                                             LLVMValueRef post_shift)
-{
-       LLVMBuilderRef builder = ctx->builder;
-
-       num = LLVMBuildMul(builder,
-                          LLVMBuildZExt(builder, num, ctx->i64, ""),
-                          LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
-       num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
-       num = LLVMBuildTrunc(builder, num, ctx->i32, "");
-       return LLVMBuildLShr(builder, num, post_shift, "");
-}
-
-/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
- * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
- * already multiplied by two. id is the cube face number.
- */
-struct cube_selection_coords {
-       LLVMValueRef stc[2];
-       LLVMValueRef ma;
-       LLVMValueRef id;
-};
-
-static void
-build_cube_intrinsic(struct ac_llvm_context *ctx,
-                    LLVMValueRef in[3],
-                    struct cube_selection_coords *out)
-{
-       LLVMTypeRef f32 = ctx->f32;
-
-       out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
-                                        f32, in, 3, AC_FUNC_ATTR_READNONE);
-       out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
-                                        f32, in, 3, AC_FUNC_ATTR_READNONE);
-       out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
-                                    f32, in, 3, AC_FUNC_ATTR_READNONE);
-       out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
-                                    f32, in, 3, AC_FUNC_ATTR_READNONE);
-}
-
-/**
- * Build a manual selection sequence for cube face sc/tc coordinates and
- * major axis vector (multiplied by 2 for consistency) for the given
- * vec3 \p coords, for the face implied by \p selcoords.
- *
- * For the major axis, we always adjust the sign to be in the direction of
- * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
- * the selcoords major axis.
- */
-static void build_cube_select(struct ac_llvm_context *ctx,
-                             const struct cube_selection_coords *selcoords,
-                             const LLVMValueRef *coords,
-                             LLVMValueRef *out_st,
-                             LLVMValueRef *out_ma)
-{
-       LLVMBuilderRef builder = ctx->builder;
-       LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
-       LLVMValueRef is_ma_positive;
-       LLVMValueRef sgn_ma;
-       LLVMValueRef is_ma_z, is_not_ma_z;
-       LLVMValueRef is_ma_y;
-       LLVMValueRef is_ma_x;
-       LLVMValueRef sgn;
-       LLVMValueRef tmp;
-
-       is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
-               selcoords->ma, LLVMConstReal(f32, 0.0), "");
-       sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
-               LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
-
-       is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
-       is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
-       is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
-               LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
-       is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
-
-       /* Select sc */
-       tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
-       sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
-               LLVMBuildSelect(builder, is_ma_z, sgn_ma,
-                       LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
-       out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
-
-       /* Select tc */
-       tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
-       sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
-               LLVMConstReal(f32, -1.0), "");
-       out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
-
-       /* Select ma */
-       tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
-               LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
-       tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
-                                ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
-       *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
-}
-
-void
-ac_prepare_cube_coords(struct ac_llvm_context *ctx,
-                      bool is_deriv, bool is_array, bool is_lod,
-                      LLVMValueRef *coords_arg,
-                      LLVMValueRef *derivs_arg)
-{
-
-       LLVMBuilderRef builder = ctx->builder;
-       struct cube_selection_coords selcoords;
-       LLVMValueRef coords[3];
-       LLVMValueRef invma;
-
-       if (is_array && !is_lod) {
-               LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
-
-               /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
-                *
-                *    "For Array forms, the array layer used will be
-                *
-                *       max(0, min(d−1, floor(layer+0.5)))
-                *
-                *     where d is the depth of the texture array and layer
-                *     comes from the component indicated in the tables below.
-                *     Workaroudn for an issue where the layer is taken from a
-                *     helper invocation which happens to fall on a different
-                *     layer due to extrapolation."
-                *
-                * GFX8 and earlier attempt to implement this in hardware by
-                * clamping the value of coords[2] = (8 * layer) + face.
-                * Unfortunately, this means that the we end up with the wrong
-                * face when clamping occurs.
-                *
-                * Clamp the layer earlier to work around the issue.
-                */
-               if (ctx->chip_class <= GFX8) {
-                       LLVMValueRef ge0;
-                       ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
-                       tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
-               }
-
-               coords_arg[3] = tmp;
-       }
-
-       build_cube_intrinsic(ctx, coords_arg, &selcoords);
-
-       invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
-                       ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
-       invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
-
-       for (int i = 0; i < 2; ++i)
-               coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
-
-       coords[2] = selcoords.id;
-
-       if (is_deriv && derivs_arg) {
-               LLVMValueRef derivs[4];
-               int axis;
-
-               /* Convert cube derivatives to 2D derivatives. */
-               for (axis = 0; axis < 2; axis++) {
-                       LLVMValueRef deriv_st[2];
-                       LLVMValueRef deriv_ma;
-
-                       /* Transform the derivative alongside the texture
-                        * coordinate. Mathematically, the correct formula is
-                        * as follows. Assume we're projecting onto the +Z face
-                        * and denote by dx/dh the derivative of the (original)
-                        * X texture coordinate with respect to horizontal
-                        * window coordinates. The projection onto the +Z face
-                        * plane is:
-                        *
-                        *   f(x,z) = x/z
-                        *
-                        * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
-                        *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
-                        *
-                        * This motivatives the implementation below.
-                        *
-                        * Whether this actually gives the expected results for
-                        * apps that might feed in derivatives obtained via
-                        * finite differences is anyone's guess. The OpenGL spec
-                        * seems awfully quiet about how textureGrad for cube
-                        * maps should be handled.
-                        */
-                       build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
-                                         deriv_st, &deriv_ma);
-
-                       deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
-
-                       for (int i = 0; i < 2; ++i)
-                               derivs[axis * 2 + i] =
-                                       LLVMBuildFSub(builder,
-                                               LLVMBuildFMul(builder, deriv_st[i], invma, ""),
-                                               LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
-               }
-
-               memcpy(derivs_arg, derivs, sizeof(derivs));
-       }
-
-       /* Shift the texture coordinate. This must be applied after the
-        * derivative calculation.
-        */
-       for (int i = 0; i < 2; ++i)
-               coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
-
-       if (is_array) {
-               /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
-               /* coords_arg.w component - array_index for cube arrays */
-               coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
-       }
-
-       memcpy(coords_arg, coords, sizeof(coords));
-}
-
-
-LLVMValueRef
-ac_build_fs_interp(struct ac_llvm_context *ctx,
-                  LLVMValueRef llvm_chan,
-                  LLVMValueRef attr_number,
-                  LLVMValueRef params,
-                  LLVMValueRef i,
-                  LLVMValueRef j)
-{
-       LLVMValueRef args[5];
-       LLVMValueRef p1;
-
-       args[0] = i;
-       args[1] = llvm_chan;
-       args[2] = attr_number;
-       args[3] = params;
-
-       p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
-                               ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
-
-       args[0] = p1;
-       args[1] = j;
-       args[2] = llvm_chan;
-       args[3] = attr_number;
-       args[4] = params;
-
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
-                                 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
-                      LLVMValueRef llvm_chan,
-                      LLVMValueRef attr_number,
-                      LLVMValueRef params,
-                      LLVMValueRef i,
-                      LLVMValueRef j)
-{
-       LLVMValueRef args[6];
-       LLVMValueRef p1;
-
-       args[0] = i;
-       args[1] = llvm_chan;
-       args[2] = attr_number;
-       args[3] = ctx->i1false;
-       args[4] = params;
-
-       p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
-                               ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
-
-       args[0] = p1;
-       args[1] = j;
-       args[2] = llvm_chan;
-       args[3] = attr_number;
-       args[4] = ctx->i1false;
-       args[5] = params;
-
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
-                                 ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
-                      LLVMValueRef parameter,
-                      LLVMValueRef llvm_chan,
-                      LLVMValueRef attr_number,
-                      LLVMValueRef params)
-{
-       LLVMValueRef args[4];
-
-       args[0] = parameter;
-       args[1] = llvm_chan;
-       args[2] = attr_number;
-       args[3] = params;
-
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
-                                 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_gep_ptr(struct ac_llvm_context *ctx,
-                LLVMValueRef base_ptr,
-                LLVMValueRef index)
-{
-       return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
-}
-
-LLVMValueRef
-ac_build_gep0(struct ac_llvm_context *ctx,
-             LLVMValueRef base_ptr,
-             LLVMValueRef index)
-{
-       LLVMValueRef indices[2] = {
-               ctx->i32_0,
-               index,
-       };
-       return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
-}
-
-LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
-                                 LLVMValueRef index)
-{
-       return LLVMBuildPointerCast(ctx->builder,
-                                   LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
-                                   LLVMTypeOf(ptr), "");
-}
-
-void
-ac_build_indexed_store(struct ac_llvm_context *ctx,
-                      LLVMValueRef base_ptr, LLVMValueRef index,
-                      LLVMValueRef value)
-{
-       LLVMBuildStore(ctx->builder, value,
-                      ac_build_gep0(ctx, base_ptr, index));
-}
-
-/**
- * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
- * It's equivalent to doing a load from &base_ptr[index].
- *
- * \param base_ptr  Where the array starts.
- * \param index     The element index into the array.
- * \param uniform   Whether the base_ptr and index can be assumed to be
- *                  dynamically uniform (i.e. load to an SGPR)
- * \param invariant Whether the load is invariant (no other opcodes affect it)
- * \param no_unsigned_wraparound
- *    For all possible re-associations and re-distributions of an expression
- *    "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
- *    without inbounds in base_ptr), this parameter is true if "addr + offset"
- *    does not result in an unsigned integer wraparound. This is used for
- *    optimal code generation of 32-bit pointer arithmetic.
- *
- *    For example, a 32-bit immediate offset that causes a 32-bit unsigned
- *    integer wraparound can't be an imm offset in s_load_dword, because
- *    the instruction performs "addr + offset" in 64 bits.
- *
- *    Expected usage for bindless textures by chaining GEPs:
- *      // possible unsigned wraparound, don't use InBounds:
- *      ptr1 = LLVMBuildGEP(base_ptr, index);
- *      image = load(ptr1); // becomes "s_load ptr1, 0"
- *
- *      ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
- *      sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
- */
-static LLVMValueRef
-ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
-                    LLVMValueRef index, bool uniform, bool invariant,
-                    bool no_unsigned_wraparound)
-{
-       LLVMValueRef pointer, result;
-
-       if (no_unsigned_wraparound &&
-           LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
-               pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
-       else
-               pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
-
-       if (uniform)
-               LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
-       result = LLVMBuildLoad(ctx->builder, pointer, "");
-       if (invariant)
-               LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
-       return result;
-}
-
-LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
-                          LLVMValueRef index)
-{
-       return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
-}
-
-LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
-                                    LLVMValueRef base_ptr, LLVMValueRef index)
-{
-       return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
-}
-
-/* This assumes that there is no unsigned integer wraparound during the address
- * computation, excluding all GEPs within base_ptr. */
-LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
-                                  LLVMValueRef base_ptr, LLVMValueRef index)
-{
-       return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
-}
-
-/* See ac_build_load_custom() documentation. */
-LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
-                                  LLVMValueRef base_ptr, LLVMValueRef index)
-{
-       return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
-}
-
-static unsigned get_load_cache_policy(struct ac_llvm_context *ctx,
-                                     unsigned cache_policy)
-{
-       return cache_policy |
-              (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
-}
-
-static void
-ac_build_buffer_store_common(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef data,
-                            LLVMValueRef vindex,
-                            LLVMValueRef voffset,
-                            LLVMValueRef soffset,
-                            unsigned num_channels,
-                            LLVMTypeRef return_channel_type,
-                            unsigned cache_policy,
-                            bool use_format,
-                            bool structurized)
-{
-       LLVMValueRef args[6];
-       int idx = 0;
-       args[idx++] = data;
-       args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
-       if (structurized)
-               args[idx++] = vindex ? vindex : ctx->i32_0;
-       args[idx++] = voffset ? voffset : ctx->i32_0;
-       args[idx++] = soffset ? soffset : ctx->i32_0;
-       args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
-       unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
-       const char *indexing_kind = structurized ? "struct" : "raw";
-       char name[256], type_name[8];
-
-       LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type;
-       ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
-       if (use_format) {
-               snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
-                        indexing_kind, type_name);
-       } else {
-               snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
-                        indexing_kind, type_name);
-       }
-
-       ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
-                          AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
-}
-
-void
-ac_build_buffer_store_format(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef data,
-                            LLVMValueRef vindex,
-                            LLVMValueRef voffset,
-                            unsigned num_channels,
-                            unsigned cache_policy)
-{
-       ac_build_buffer_store_common(ctx, rsrc, data, vindex,
-                                    voffset, NULL, num_channels,
-                                    ctx->f32, cache_policy,
-                                    true, true);
-}
-
-/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
- * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
- * or v4i32 (num_channels=3,4).
- */
-void
-ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef vdata,
-                           unsigned num_channels,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           unsigned inst_offset,
-                           unsigned cache_policy,
-                           bool swizzle_enable_hint)
-{
-       /* Split 3 channel stores, because only LLVM 9+ support 3-channel
-        * intrinsics. */
-       if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
-               LLVMValueRef v[3], v01;
-
-               for (int i = 0; i < 3; i++) {
-                       v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
-                                       LLVMConstInt(ctx->i32, i, 0), "");
-               }
-               v01 = ac_build_gather_values(ctx, v, 2);
-
-               ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
-                                           soffset, inst_offset, cache_policy,
-                                           swizzle_enable_hint);
-               ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
-                                           soffset, inst_offset + 8,
-                                           cache_policy,
-                                           swizzle_enable_hint);
-               return;
-       }
-
-       /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
-        * (voffset is swizzled, but soffset isn't swizzled).
-        * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
-        */
-       if (!swizzle_enable_hint) {
-               LLVMValueRef offset = soffset;
-
-               if (inst_offset)
-                       offset = LLVMBuildAdd(ctx->builder, offset,
-                                             LLVMConstInt(ctx->i32, inst_offset, 0), "");
-
-               ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata),
-                                            ctx->i32_0, voffset, offset,
-                                            num_channels, ctx->f32,
-                                            cache_policy, false, false);
-               return;
-       }
-
-       static const unsigned dfmts[] = {
-               V_008F0C_BUF_DATA_FORMAT_32,
-               V_008F0C_BUF_DATA_FORMAT_32_32,
-               V_008F0C_BUF_DATA_FORMAT_32_32_32,
-               V_008F0C_BUF_DATA_FORMAT_32_32_32_32
-       };
-       unsigned dfmt = dfmts[num_channels - 1];
-       unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-       LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
-
-       ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
-                                  immoffset, num_channels, dfmt, nfmt, cache_policy);
-}
-
-static LLVMValueRef
-ac_build_buffer_load_common(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef vindex,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           unsigned num_channels,
-                           LLVMTypeRef channel_type,
-                           unsigned cache_policy,
-                           bool can_speculate,
-                           bool use_format,
-                           bool structurized)
-{
-       LLVMValueRef args[5];
-       int idx = 0;
-       args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
-       if (structurized)
-               args[idx++] = vindex ? vindex : ctx->i32_0;
-       args[idx++] = voffset ? voffset : ctx->i32_0;
-       args[idx++] = soffset ? soffset : ctx->i32_0;
-       args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
-       unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
-       const char *indexing_kind = structurized ? "struct" : "raw";
-       char name[256], type_name[8];
-
-       LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
-       ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
-       if (use_format) {
-               snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
-                        indexing_kind, type_name);
-       } else {
-               snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
-                        indexing_kind, type_name);
-       }
-
-       return ac_build_intrinsic(ctx, name, type, args, idx,
-                                 ac_get_load_intr_attribs(can_speculate));
-}
-
-LLVMValueRef
-ac_build_buffer_load(struct ac_llvm_context *ctx,
-                    LLVMValueRef rsrc,
-                    int num_channels,
-                    LLVMValueRef vindex,
-                    LLVMValueRef voffset,
-                    LLVMValueRef soffset,
-                    unsigned inst_offset,
-                    unsigned cache_policy,
-                    bool can_speculate,
-                    bool allow_smem)
-{
-       LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
-       if (voffset)
-               offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
-       if (soffset)
-               offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
-
-       if (allow_smem && !(cache_policy & ac_slc) &&
-           (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
-               assert(vindex == NULL);
-
-               LLVMValueRef result[8];
-
-               for (int i = 0; i < num_channels; i++) {
-                       if (i) {
-                               offset = LLVMBuildAdd(ctx->builder, offset,
-                                                     LLVMConstInt(ctx->i32, 4, 0), "");
-                       }
-                       LLVMValueRef args[3] = {
-                               rsrc,
-                               offset,
-                               LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
-                       };
-                       result[i] = ac_build_intrinsic(ctx,
-                                                      "llvm.amdgcn.s.buffer.load.f32",
-                                                      ctx->f32, args, 3,
-                                                      AC_FUNC_ATTR_READNONE);
-               }
-               if (num_channels == 1)
-                       return result[0];
-
-               if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
-                       result[num_channels++] = LLVMGetUndef(ctx->f32);
-               return ac_build_gather_values(ctx, result, num_channels);
-       }
-
-       return ac_build_buffer_load_common(ctx, rsrc, vindex,
-                                          offset, ctx->i32_0,
-                                          num_channels, ctx->f32,
-                                          cache_policy,
-                                          can_speculate, false, false);
-}
-
-LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
-                                        LLVMValueRef rsrc,
-                                        LLVMValueRef vindex,
-                                        LLVMValueRef voffset,
-                                        unsigned num_channels,
-                                        unsigned cache_policy,
-                                        bool can_speculate)
-{
-       return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
-                                          ctx->i32_0, num_channels, ctx->f32,
-                                          cache_policy, can_speculate,
-                                          true, true);
-}
-
-static LLVMValueRef
-ac_build_tbuffer_load(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef vindex,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           LLVMValueRef immoffset,
-                           unsigned num_channels,
-                           unsigned dfmt,
-                           unsigned nfmt,
-                           unsigned cache_policy,
-                           bool can_speculate,
-                           bool structurized)
-{
-       voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
-       LLVMValueRef args[6];
-       int idx = 0;
-       args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
-       if (structurized)
-               args[idx++] = vindex ? vindex : ctx->i32_0;
-       args[idx++] = voffset ? voffset : ctx->i32_0;
-       args[idx++] = soffset ? soffset : ctx->i32_0;
-       args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
-       args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
-       unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
-       const char *indexing_kind = structurized ? "struct" : "raw";
-       char name[256], type_name[8];
-
-       LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
-       ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
-       snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
-                indexing_kind, type_name);
-
-       return ac_build_intrinsic(ctx, name, type, args, idx,
-                                 ac_get_load_intr_attribs(can_speculate));
-}
-
-LLVMValueRef
-ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef vindex,
-                            LLVMValueRef voffset,
-                            LLVMValueRef soffset,
-                            LLVMValueRef immoffset,
-                            unsigned num_channels,
-                            unsigned dfmt,
-                            unsigned nfmt,
-                            unsigned cache_policy,
-                            bool can_speculate)
-{
-       return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
-                                    immoffset, num_channels, dfmt, nfmt,
-                                    cache_policy, can_speculate, true);
-}
-
-LLVMValueRef
-ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
-                         LLVMValueRef rsrc,
-                         LLVMValueRef voffset,
-                         LLVMValueRef soffset,
-                         LLVMValueRef immoffset,
-                         unsigned num_channels,
-                         unsigned dfmt,
-                         unsigned nfmt,
-                         unsigned cache_policy,
-                         bool can_speculate)
-{
-       return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
-                                    immoffset, num_channels, dfmt, nfmt,
-                                    cache_policy, can_speculate, false);
-}
-
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           LLVMValueRef immoffset,
-                           unsigned cache_policy)
-{
-       LLVMValueRef res;
-
-       if (LLVM_VERSION_MAJOR >= 9) {
-               voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
-               /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
-               res = ac_build_buffer_load_common(ctx, rsrc, NULL,
-                                                 voffset, soffset,
-                                                 1, ctx->i16, cache_policy,
-                                                 false, false, false);
-       } else {
-               unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
-               unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
-               res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
-                                               immoffset, 1, dfmt, nfmt, cache_policy,
-                                               false);
-
-               res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
-       }
-
-       return res;
-}
-
-LLVMValueRef
-ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
-                          LLVMValueRef rsrc,
-                          LLVMValueRef voffset,
-                          LLVMValueRef soffset,
-                          LLVMValueRef immoffset,
-                          unsigned cache_policy)
-{
-       LLVMValueRef res;
-
-       if (LLVM_VERSION_MAJOR >= 9) {
-               voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
-               /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
-               res = ac_build_buffer_load_common(ctx, rsrc, NULL,
-                                                 voffset, soffset,
-                                                 1, ctx->i8, cache_policy,
-                                                 false, false, false);
-       } else {
-               unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
-               unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
-               res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
-                                               immoffset, 1, dfmt, nfmt, cache_policy,
-                                               false);
-
-               res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
-       }
-
-       return res;
-}
-
-/**
- * Convert an 11- or 10-bit unsigned floating point number to an f32.
- *
- * The input exponent is expected to be biased analogous to IEEE-754, i.e. by
- * 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs).
- */
-static LLVMValueRef
-ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned exp_bits, unsigned mant_bits)
-{
-       assert(LLVMTypeOf(src) == ctx->i32);
-
-       LLVMValueRef tmp;
-       LLVMValueRef mantissa;
-       mantissa = LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
-
-       /* Converting normal numbers is just a shift + correcting the exponent bias */
-       unsigned normal_shift = 23 - mant_bits;
-       unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1);
-       LLVMValueRef shifted, normal;
-
-       shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
-       normal = LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
-
-       /* Converting nan/inf numbers is the same, but with a different exponent update */
-       LLVMValueRef naninf;
-       naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
-
-       /* Converting denormals is the complex case: determine the leading zeros of the
-        * mantissa to obtain the correct shift for the mantissa and exponent correction.
-        */
-       LLVMValueRef denormal;
-       LLVMValueRef params[2] = {
-               mantissa,
-               ctx->i1true, /* result can be undef when arg is 0 */
-       };
-       LLVMValueRef ctlz = ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32,
-                                             params, 2, AC_FUNC_ATTR_READNONE);
-
-       /* Shift such that the leading 1 ends up as the LSB of the exponent field. */
-       tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
-       denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
-
-       unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1;
-       tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
-       tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
-       denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
-
-       /* Select the final result. */
-       LLVMValueRef result;
-
-       tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
-                           LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), "");
-       result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
-
-       tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
-                           LLVMConstInt(ctx->i32, 1 << mant_bits, false), "");
-       result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
-
-       tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
-       result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
-
-       return ac_to_float(ctx, result);
-}
-
-/**
- * Generate a fully general open coded buffer format fetch with all required
- * fixups suitable for vertex fetch, using non-format buffer loads.
- *
- * Some combinations of argument values have special interpretations:
- * - size = 8 bytes, format = fixed indicates PIPE_FORMAT_R11G11B10_FLOAT
- * - size = 8 bytes, format != {float,fixed} indicates a 2_10_10_10 data format
- *
- * \param log_size log(size of channel in bytes)
- * \param num_channels number of channels (1 to 4)
- * \param format AC_FETCH_FORMAT_xxx value
- * \param reverse whether XYZ channels are reversed
- * \param known_aligned whether the source is known to be aligned to hardware's
- *                      effective element size for loading the given format
- *                      (note: this means dword alignment for 8_8_8_8, 16_16, etc.)
- * \param rsrc buffer resource descriptor
- * \return the resulting vector of floats or integers bitcast to <4 x i32>
- */
-LLVMValueRef
-ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
-                              unsigned log_size,
-                              unsigned num_channels,
-                              unsigned format,
-                              bool reverse,
-                              bool known_aligned,
-                              LLVMValueRef rsrc,
-                              LLVMValueRef vindex,
-                              LLVMValueRef voffset,
-                              LLVMValueRef soffset,
-                              unsigned cache_policy,
-                              bool can_speculate)
-{
-       LLVMValueRef tmp;
-       unsigned load_log_size = log_size;
-       unsigned load_num_channels = num_channels;
-       if (log_size == 3) {
-               load_log_size = 2;
-               if (format == AC_FETCH_FORMAT_FLOAT) {
-                       load_num_channels = 2 * num_channels;
-               } else {
-                       load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */
-               }
-       }
-
-       int log_recombine = 0;
-       if (ctx->chip_class == GFX6 && !known_aligned) {
-               /* Avoid alignment restrictions by loading one byte at a time. */
-               load_num_channels <<= load_log_size;
-               log_recombine = load_log_size;
-               load_log_size = 0;
-       } else if (load_num_channels == 2 || load_num_channels == 4) {
-               log_recombine = -util_logbase2(load_num_channels);
-               load_num_channels = 1;
-               load_log_size += -log_recombine;
-       }
-
-       assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9);
-
-       LLVMValueRef loads[32]; /* up to 32 bytes */
-       for (unsigned i = 0; i < load_num_channels; ++i) {
-               tmp = LLVMBuildAdd(ctx->builder, soffset,
-                                  LLVMConstInt(ctx->i32, i << load_log_size, false), "");
-               LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 :
-                                          load_log_size == 1 ? ctx->i16 : ctx->i32;
-               unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
-               loads[i] = ac_build_buffer_load_common(
-                               ctx, rsrc, vindex, voffset, tmp,
-                               num_channels, channel_type, cache_policy,
-                               can_speculate, false, true);
-               if (load_log_size >= 2)
-                       loads[i] = ac_to_integer(ctx, loads[i]);
-       }
-
-       if (log_recombine > 0) {
-               /* Recombine bytes if necessary (GFX6 only) */
-               LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
-
-               for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
-                       LLVMValueRef accum = NULL;
-                       for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) {
-                               tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
-                               if (i == 0) {
-                                       accum = tmp;
-                               } else {
-                                       tmp = LLVMBuildShl(ctx->builder, tmp,
-                                                          LLVMConstInt(dst_type, 8 * i, false), "");
-                                       accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
-                               }
-                       }
-                       loads[dst] = accum;
-               }
-       } else if (log_recombine < 0) {
-               /* Split vectors of dwords */
-               if (load_log_size > 2) {
-                       assert(load_num_channels == 1);
-                       LLVMValueRef loaded = loads[0];
-                       unsigned log_split = load_log_size - 2;
-                       log_recombine += log_split;
-                       load_num_channels = 1 << log_split;
-                       load_log_size = 2;
-                       for (unsigned i = 0; i < load_num_channels; ++i) {
-                               tmp = LLVMConstInt(ctx->i32, i, false);
-                               loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
-                       }
-               }
-
-               /* Further split dwords and shorts if required */
-               if (log_recombine < 0) {
-                       for (unsigned src = load_num_channels,
-                                     dst = load_num_channels << -log_recombine;
-                            src > 0; --src) {
-                               unsigned dst_bits = 1 << (3 + load_log_size + log_recombine);
-                               LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
-                               LLVMValueRef loaded = loads[src - 1];
-                               LLVMTypeRef loaded_type = LLVMTypeOf(loaded);
-                               for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) {
-                                       tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false);
-                                       tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
-                                       loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
-                               }
-                       }
-               }
-       }
-
-       if (log_size == 3) {
-               if (format == AC_FETCH_FORMAT_FLOAT) {
-                       for (unsigned i = 0; i < num_channels; ++i) {
-                               tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
-                               loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
-                       }
-               } else if (format == AC_FETCH_FORMAT_FIXED) {
-                       /* 10_11_11_FLOAT */
-                       LLVMValueRef data = loads[0];
-                       LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
-                       LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
-                       tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
-                       LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
-                       LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
-
-                       loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
-                       loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
-                       loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
-
-                       num_channels = 3;
-                       log_size = 2;
-                       format = AC_FETCH_FORMAT_FLOAT;
-               } else {
-                       /* 2_10_10_10 data formats */
-                       LLVMValueRef data = loads[0];
-                       LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
-                       LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
-                       loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
-                       tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
-                       loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
-                       tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
-                       loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
-                       tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
-                       loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
-
-                       num_channels = 4;
-               }
-       }
-
-       if (format == AC_FETCH_FORMAT_FLOAT) {
-               if (log_size != 2) {
-                       for (unsigned chan = 0; chan < num_channels; ++chan) {
-                               tmp = ac_to_float(ctx, loads[chan]);
-                               if (log_size == 3)
-                                       tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
-                               else if (log_size == 1)
-                                       tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
-                               loads[chan] = ac_to_integer(ctx, tmp);
-                       }
-               }
-       } else if (format == AC_FETCH_FORMAT_UINT) {
-               if (log_size != 2) {
-                       for (unsigned chan = 0; chan < num_channels; ++chan)
-                               loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
-               }
-       } else if (format == AC_FETCH_FORMAT_SINT) {
-               if (log_size != 2) {
-                       for (unsigned chan = 0; chan < num_channels; ++chan)
-                               loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
-               }
-       } else {
-               bool unsign = format == AC_FETCH_FORMAT_UNORM ||
-                             format == AC_FETCH_FORMAT_USCALED ||
-                             format == AC_FETCH_FORMAT_UINT;
-
-               for (unsigned chan = 0; chan < num_channels; ++chan) {
-                       if (unsign) {
-                               tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
-                       } else {
-                               tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
-                       }
-
-                       LLVMValueRef scale = NULL;
-                       if (format == AC_FETCH_FORMAT_FIXED) {
-                               assert(log_size == 2);
-                               scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
-                       } else if (format == AC_FETCH_FORMAT_UNORM) {
-                               unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
-                               scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
-                       } else if (format == AC_FETCH_FORMAT_SNORM) {
-                               unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
-                               scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
-                       }
-                       if (scale)
-                               tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
-
-                       if (format == AC_FETCH_FORMAT_SNORM) {
-                               /* Clamp to [-1, 1] */
-                               LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
-                               LLVMValueRef clamp =
-                                       LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
-                               tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
-                       }
-
-                       loads[chan] = ac_to_integer(ctx, tmp);
-               }
-       }
-
-       while (num_channels < 4) {
-               if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) {
-                       loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
-               } else {
-                       loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
-               }
-               num_channels++;
-       }
-
-       if (reverse) {
-               tmp = loads[0];
-               loads[0] = loads[2];
-               loads[2] = tmp;
-       }
-
-       return ac_build_gather_values(ctx, loads, 4);
-}
-
-static void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
-                      LLVMValueRef rsrc,
-                      LLVMValueRef vdata,
-                      LLVMValueRef vindex,
-                      LLVMValueRef voffset,
-                      LLVMValueRef soffset,
-                      LLVMValueRef immoffset,
-                      unsigned num_channels,
-                      unsigned dfmt,
-                      unsigned nfmt,
-                      unsigned cache_policy,
-                      bool structurized)
-{
-       voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
-                              immoffset, "");
-
-       LLVMValueRef args[7];
-       int idx = 0;
-       args[idx++] = vdata;
-       args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
-       if (structurized)
-               args[idx++] = vindex ? vindex : ctx->i32_0;
-       args[idx++] = voffset ? voffset : ctx->i32_0;
-       args[idx++] = soffset ? soffset : ctx->i32_0;
-       args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
-       args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
-       unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
-       const char *indexing_kind = structurized ? "struct" : "raw";
-       char name[256], type_name[8];
-
-       LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
-       ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
-       snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
-                indexing_kind, type_name);
-
-       ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
-                          AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
-}
-
-void
-ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
-                             LLVMValueRef rsrc,
-                             LLVMValueRef vdata,
-                             LLVMValueRef vindex,
-                             LLVMValueRef voffset,
-                             LLVMValueRef soffset,
-                             LLVMValueRef immoffset,
-                             unsigned num_channels,
-                             unsigned dfmt,
-                             unsigned nfmt,
-                             unsigned cache_policy)
-{
-       ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
-                              immoffset, num_channels, dfmt, nfmt, cache_policy,
-                              true);
-}
-
-void
-ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
-                          LLVMValueRef rsrc,
-                          LLVMValueRef vdata,
-                          LLVMValueRef voffset,
-                          LLVMValueRef soffset,
-                          LLVMValueRef immoffset,
-                          unsigned num_channels,
-                          unsigned dfmt,
-                          unsigned nfmt,
-                          unsigned cache_policy)
-{
-       ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset,
-                              immoffset, num_channels, dfmt, nfmt, cache_policy,
-                              false);
-}
-
-void
-ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef vdata,
-                            LLVMValueRef voffset,
-                            LLVMValueRef soffset,
-                            unsigned cache_policy)
-{
-       vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
-
-       if (LLVM_VERSION_MAJOR >= 9) {
-               /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
-               ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
-                                            voffset, soffset, 1,
-                                            ctx->i16, cache_policy,
-                                            false, false);
-       } else {
-               unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
-               unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
-               vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
-
-               ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
-                                          ctx->i32_0, 1, dfmt, nfmt, cache_policy);
-       }
-}
-
-void
-ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef vdata,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           unsigned cache_policy)
-{
-       vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
-
-       if (LLVM_VERSION_MAJOR >= 9) {
-               /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
-               ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
-                                            voffset, soffset, 1,
-                                            ctx->i8, cache_policy,
-                                            false, false);
-       } else {
-               unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
-               unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
-               vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
-
-               ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
-                                          ctx->i32_0, 1, dfmt, nfmt, cache_policy);
-       }
-}
-/**
- * Set range metadata on an instruction.  This can only be used on load and
- * call instructions.  If you know an instruction can only produce the values
- * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
- * \p lo is the minimum value inclusive.
- * \p hi is the maximum value exclusive.
- */
-static void set_range_metadata(struct ac_llvm_context *ctx,
-                              LLVMValueRef value, unsigned lo, unsigned hi)
-{
-       LLVMValueRef range_md, md_args[2];
-       LLVMTypeRef type = LLVMTypeOf(value);
-       LLVMContextRef context = LLVMGetTypeContext(type);
-
-       md_args[0] = LLVMConstInt(type, lo, false);
-       md_args[1] = LLVMConstInt(type, hi, false);
-       range_md = LLVMMDNodeInContext(context, md_args, 2);
-       LLVMSetMetadata(value, ctx->range_md_kind, range_md);
-}
-
-LLVMValueRef
-ac_get_thread_id(struct ac_llvm_context *ctx)
-{
-       LLVMValueRef tid;
-
-       LLVMValueRef tid_args[2];
-       tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
-       tid_args[1] = ctx->i32_0;
-       tid_args[1] = ac_build_intrinsic(ctx,
-                                        "llvm.amdgcn.mbcnt.lo", ctx->i32,
-                                        tid_args, 2, AC_FUNC_ATTR_READNONE);
-
-       if (ctx->wave_size == 32) {
-               tid = tid_args[1];
-       } else {
-               tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
-                                        ctx->i32, tid_args,
-                                        2, AC_FUNC_ATTR_READNONE);
-       }
-       set_range_metadata(ctx, tid, 0, ctx->wave_size);
-       return tid;
-}
-
-/*
- * AMD GCN implements derivatives using the local data store (LDS)
- * All writes to the LDS happen in all executing threads at
- * the same time. TID is the Thread ID for the current
- * thread and is a value between 0 and 63, representing
- * the thread's position in the wavefront.
- *
- * For the pixel shader threads are grouped into quads of four pixels.
- * The TIDs of the pixels of a quad are:
- *
- *  +------+------+
- *  |4n + 0|4n + 1|
- *  +------+------+
- *  |4n + 2|4n + 3|
- *  +------+------+
- *
- * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
- * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
- * the current pixel's column, and masking with 0xfffffffe yields the TID
- * of the left pixel of the current pixel's row.
- *
- * Adding 1 yields the TID of the pixel to the right of the left pixel, and
- * adding 2 yields the TID of the pixel below the top pixel.
- */
-LLVMValueRef
-ac_build_ddxy(struct ac_llvm_context *ctx,
-             uint32_t mask,
-             int idx,
-             LLVMValueRef val)
-{
-       unsigned tl_lanes[4], trbl_lanes[4];
-       char name[32], type[8];
-       LLVMValueRef tl, trbl;
-       LLVMTypeRef result_type;
-       LLVMValueRef result;
-
-       result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
-
-       if (result_type == ctx->f16)
-               val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
-
-       for (unsigned i = 0; i < 4; ++i) {
-               tl_lanes[i] = i & mask;
-               trbl_lanes[i] = (i & mask) + idx;
-       }
-
-       tl = ac_build_quad_swizzle(ctx, val,
-                                  tl_lanes[0], tl_lanes[1],
-                                  tl_lanes[2], tl_lanes[3]);
-       trbl = ac_build_quad_swizzle(ctx, val,
-                                    trbl_lanes[0], trbl_lanes[1],
-                                    trbl_lanes[2], trbl_lanes[3]);
-
-       if (result_type == ctx->f16) {
-               tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
-               trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
-       }
-
-       tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
-       trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
-       result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
-
-       ac_build_type_name_for_intr(result_type, type, sizeof(type));
-       snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
-
-       return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
-}
-
-void
-ac_build_sendmsg(struct ac_llvm_context *ctx,
-                uint32_t msg,
-                LLVMValueRef wave_id)
-{
-       LLVMValueRef args[2];
-       args[0] = LLVMConstInt(ctx->i32, msg, false);
-       args[1] = wave_id;
-       ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
-}
-
-LLVMValueRef
-ac_build_imsb(struct ac_llvm_context *ctx,
-             LLVMValueRef arg,
-             LLVMTypeRef dst_type)
-{
-       LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
-                                             dst_type, &arg, 1,
-                                             AC_FUNC_ATTR_READNONE);
-
-       /* The HW returns the last bit index from MSB, but NIR/TGSI wants
-        * the index from LSB. Invert it by doing "31 - msb". */
-       msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
-                          msb, "");
-
-       LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
-       LLVMValueRef cond = LLVMBuildOr(ctx->builder,
-                                       LLVMBuildICmp(ctx->builder, LLVMIntEQ,
-                                                     arg, ctx->i32_0, ""),
-                                       LLVMBuildICmp(ctx->builder, LLVMIntEQ,
-                                                     arg, all_ones, ""), "");
-
-       return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
-}
-
-LLVMValueRef
-ac_build_umsb(struct ac_llvm_context *ctx,
-             LLVMValueRef arg,
-             LLVMTypeRef dst_type)
-{
-       const char *intrin_name;
-       LLVMTypeRef type;
-       LLVMValueRef highest_bit;
-       LLVMValueRef zero;
-       unsigned bitsize;
-
-       bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
-       switch (bitsize) {
-       case 64:
-               intrin_name = "llvm.ctlz.i64";
-               type = ctx->i64;
-               highest_bit = LLVMConstInt(ctx->i64, 63, false);
-               zero = ctx->i64_0;
-               break;
-       case 32:
-               intrin_name = "llvm.ctlz.i32";
-               type = ctx->i32;
-               highest_bit = LLVMConstInt(ctx->i32, 31, false);
-               zero = ctx->i32_0;
-               break;
-       case 16:
-               intrin_name = "llvm.ctlz.i16";
-               type = ctx->i16;
-               highest_bit = LLVMConstInt(ctx->i16, 15, false);
-               zero = ctx->i16_0;
-               break;
-       case 8:
-               intrin_name = "llvm.ctlz.i8";
-               type = ctx->i8;
-               highest_bit = LLVMConstInt(ctx->i8, 7, false);
-               zero = ctx->i8_0;
-               break;
-       default:
-               unreachable(!"invalid bitsize");
-               break;
-       }
-
-       LLVMValueRef params[2] = {
-               arg,
-               ctx->i1true,
-       };
-
-       LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
-                                             params, 2,
-                                             AC_FUNC_ATTR_READNONE);
-
-       /* The HW returns the last bit index from MSB, but TGSI/NIR wants
-        * the index from LSB. Invert it by doing "31 - msb". */
-       msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
-
-       if (bitsize == 64) {
-               msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
-       } else if (bitsize < 32) {
-               msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
-       }
-
-       /* check for zero */
-       return LLVMBuildSelect(ctx->builder,
-                              LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
-                              LLVMConstInt(ctx->i32, -1, true), msb, "");
-}
-
-LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b)
-{
-       char name[64];
-       snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
-       LLVMValueRef args[2] = {a, b};
-       return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b)
-{
-       char name[64];
-       snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
-       LLVMValueRef args[2] = {a, b};
-       return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b)
-{
-       LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
-       return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b)
-{
-       LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
-       return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b)
-{
-       LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
-       return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b)
-{
-       LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
-       return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
-       LLVMTypeRef t = LLVMTypeOf(value);
-       return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
-                            LLVMConstReal(t, 1.0));
-}
-
-void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
-{
-       LLVMValueRef args[9];
-
-       args[0] = LLVMConstInt(ctx->i32, a->target, 0);
-       args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
-
-       if (a->compr) {
-               LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
-               LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
-
-               args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
-                               v2i16, "");
-               args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
-                               v2i16, "");
-               args[4] = LLVMConstInt(ctx->i1, a->done, 0);
-               args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
-
-               ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
-                                  ctx->voidt, args, 6, 0);
-       } else {
-               args[2] = a->out[0];
-               args[3] = a->out[1];
-               args[4] = a->out[2];
-               args[5] = a->out[3];
-               args[6] = LLVMConstInt(ctx->i1, a->done, 0);
-               args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
-
-               ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
-                                  ctx->voidt, args, 8, 0);
-       }
-}
-
-void ac_build_export_null(struct ac_llvm_context *ctx)
-{
-       struct ac_export_args args;
-
-       args.enabled_channels = 0x0; /* enabled channels */
-       args.valid_mask = 1; /* whether the EXEC mask is valid */
-       args.done = 1; /* DONE bit */
-       args.target = V_008DFC_SQ_EXP_NULL;
-       args.compr = 0; /* COMPR flag (0 = 32-bit export) */
-       args.out[0] = LLVMGetUndef(ctx->f32); /* R */
-       args.out[1] = LLVMGetUndef(ctx->f32); /* G */
-       args.out[2] = LLVMGetUndef(ctx->f32); /* B */
-       args.out[3] = LLVMGetUndef(ctx->f32); /* A */
-
-       ac_build_export(ctx, &args);
-}
-
-static unsigned ac_num_coords(enum ac_image_dim dim)
-{
-       switch (dim) {
-       case ac_image_1d:
-               return 1;
-       case ac_image_2d:
-       case ac_image_1darray:
-                return 2;
-       case ac_image_3d:
-       case ac_image_cube:
-       case ac_image_2darray:
-       case ac_image_2dmsaa:
-               return 3;
-       case ac_image_2darraymsaa:
-               return 4;
-       default:
-               unreachable("ac_num_coords: bad dim");
-       }
-}
-
-static unsigned ac_num_derivs(enum ac_image_dim dim)
-{
-       switch (dim) {
-       case ac_image_1d:
-       case ac_image_1darray:
-               return 2;
-       case ac_image_2d:
-       case ac_image_2darray:
-       case ac_image_cube:
-               return 4;
-       case ac_image_3d:
-               return 6;
-       case ac_image_2dmsaa:
-       case ac_image_2darraymsaa:
-       default:
-               unreachable("derivatives not supported");
-       }
-}
-
-static const char *get_atomic_name(enum ac_atomic_op op)
-{
-       switch (op) {
-       case ac_atomic_swap: return "swap";
-       case ac_atomic_add: return "add";
-       case ac_atomic_sub: return "sub";
-       case ac_atomic_smin: return "smin";
-       case ac_atomic_umin: return "umin";
-       case ac_atomic_smax: return "smax";
-       case ac_atomic_umax: return "umax";
-       case ac_atomic_and: return "and";
-       case ac_atomic_or: return "or";
-       case ac_atomic_xor: return "xor";
-       case ac_atomic_inc_wrap: return "inc";
-       case ac_atomic_dec_wrap: return "dec";
-       }
-       unreachable("bad atomic op");
-}
-
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
-                                  struct ac_image_args *a)
-{
-       const char *overload[3] = { "", "", "" };
-       unsigned num_overloads = 0;
-       LLVMValueRef args[18];
-       unsigned num_args = 0;
-       enum ac_image_dim dim = a->dim;
-
-       assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
-              !a->level_zero);
-       assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
-               a->opcode != ac_image_store_mip) ||
-              a->lod);
-       assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
-              (!a->compare && !a->offset));
-       assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
-               a->opcode == ac_image_get_lod) ||
-              !a->bias);
-       assert((a->bias ? 1 : 0) +
-              (a->lod ? 1 : 0) +
-              (a->level_zero ? 1 : 0) +
-              (a->derivs[0] ? 1 : 0) <= 1);
-
-       if (a->opcode == ac_image_get_lod) {
-               switch (dim) {
-               case ac_image_1darray:
-                       dim = ac_image_1d;
-                       break;
-               case ac_image_2darray:
-               case ac_image_cube:
-                       dim = ac_image_2d;
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       bool sample = a->opcode == ac_image_sample ||
-                     a->opcode == ac_image_gather4 ||
-                     a->opcode == ac_image_get_lod;
-       bool atomic = a->opcode == ac_image_atomic ||
-                     a->opcode == ac_image_atomic_cmpswap;
-       bool load = a->opcode == ac_image_sample ||
-                   a->opcode == ac_image_gather4 ||
-                   a->opcode == ac_image_load ||
-                   a->opcode == ac_image_load_mip;
-       LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
-
-       if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
-               args[num_args++] = a->data[0];
-               if (a->opcode == ac_image_atomic_cmpswap)
-                       args[num_args++] = a->data[1];
-       }
-
-       if (!atomic)
-               args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
-
-       if (a->offset)
-               args[num_args++] = ac_to_integer(ctx, a->offset);
-       if (a->bias) {
-               args[num_args++] = ac_to_float(ctx, a->bias);
-               overload[num_overloads++] = ".f32";
-       }
-       if (a->compare)
-               args[num_args++] = ac_to_float(ctx, a->compare);
-       if (a->derivs[0]) {
-               unsigned count = ac_num_derivs(dim);
-               for (unsigned i = 0; i < count; ++i)
-                       args[num_args++] = ac_to_float(ctx, a->derivs[i]);
-               overload[num_overloads++] = ".f32";
-       }
-       unsigned num_coords =
-               a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
-       for (unsigned i = 0; i < num_coords; ++i)
-               args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
-       if (a->lod)
-               args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
-       overload[num_overloads++] = sample ? ".f32" : ".i32";
-
-       args[num_args++] = a->resource;
-       if (sample) {
-               args[num_args++] = a->sampler;
-               args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
-       }
-
-       args[num_args++] = ctx->i32_0; /* texfailctrl */
-       args[num_args++] = LLVMConstInt(ctx->i32,
-                                       load ? get_load_cache_policy(ctx, a->cache_policy) :
-                                              a->cache_policy, false);
-
-       const char *name;
-       const char *atomic_subop = "";
-       switch (a->opcode) {
-       case ac_image_sample: name = "sample"; break;
-       case ac_image_gather4: name = "gather4"; break;
-       case ac_image_load: name = "load"; break;
-       case ac_image_load_mip: name = "load.mip"; break;
-       case ac_image_store: name = "store"; break;
-       case ac_image_store_mip: name = "store.mip"; break;
-       case ac_image_atomic:
-               name = "atomic.";
-               atomic_subop = get_atomic_name(a->atomic);
-               break;
-       case ac_image_atomic_cmpswap:
-               name = "atomic.";
-               atomic_subop = "cmpswap";
-               break;
-       case ac_image_get_lod: name = "getlod"; break;
-       case ac_image_get_resinfo: name = "getresinfo"; break;
-       default: unreachable("invalid image opcode");
-       }
-
-       const char *dimname;
-       switch (dim) {
-       case ac_image_1d: dimname = "1d"; break;
-       case ac_image_2d: dimname = "2d"; break;
-       case ac_image_3d: dimname = "3d"; break;
-       case ac_image_cube: dimname = "cube"; break;
-       case ac_image_1darray: dimname = "1darray"; break;
-       case ac_image_2darray: dimname = "2darray"; break;
-       case ac_image_2dmsaa: dimname = "2dmsaa"; break;
-       case ac_image_2darraymsaa: dimname = "2darraymsaa"; break;
-       default: unreachable("invalid dim");
-       }
-
-       bool lod_suffix =
-               a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
-       char intr_name[96];
-       snprintf(intr_name, sizeof(intr_name),
-                "llvm.amdgcn.image.%s%s" /* base name */
-                "%s%s%s" /* sample/gather modifiers */
-                ".%s.%s%s%s%s", /* dimension and type overloads */
-                name, atomic_subop,
-                a->compare ? ".c" : "",
-                a->bias ? ".b" :
-                lod_suffix ? ".l" :
-                a->derivs[0] ? ".d" :
-                a->level_zero ? ".lz" : "",
-                a->offset ? ".o" : "",
-                dimname,
-                atomic ? "i32" : "v4f32",
-                overload[0], overload[1], overload[2]);
-
-       LLVMTypeRef retty;
-       if (atomic)
-               retty = ctx->i32;
-       else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
-               retty = ctx->voidt;
-       else
-               retty = ctx->v4f32;
-
-       LLVMValueRef result =
-               ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
-                                  a->attributes);
-       if (!sample && retty == ctx->v4f32) {
-               result = LLVMBuildBitCast(ctx->builder, result,
-                                         ctx->v4i32, "");
-       }
-       return result;
-}
-
-LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
-                                            LLVMValueRef rsrc)
-{
-       LLVMValueRef samples;
-
-       /* Read the samples from the descriptor directly.
-        * Hardware doesn't have any instruction for this.
-        */
-       samples = LLVMBuildExtractElement(ctx->builder, rsrc,
-                                         LLVMConstInt(ctx->i32, 3, 0), "");
-       samples = LLVMBuildLShr(ctx->builder, samples,
-                               LLVMConstInt(ctx->i32, 16, 0), "");
-       samples = LLVMBuildAnd(ctx->builder, samples,
-                              LLVMConstInt(ctx->i32, 0xf, 0), "");
-       samples = LLVMBuildShl(ctx->builder, ctx->i32_1,
-                              samples, "");
-       return samples;
-}
-
-LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
-                                   LLVMValueRef args[2])
-{
-       LLVMTypeRef v2f16 =
-               LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
-
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16,
-                                 args, 2, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
-                                    LLVMValueRef args[2])
-{
-       LLVMValueRef res =
-               ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
-                                  ctx->v2i16, args, 2,
-                                  AC_FUNC_ATTR_READNONE);
-       return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
-                                    LLVMValueRef args[2])
-{
-       LLVMValueRef res =
-               ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
-                                  ctx->v2i16, args, 2,
-                                  AC_FUNC_ATTR_READNONE);
-       return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-/* The 8-bit and 10-bit clamping is for HW workarounds. */
-LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
-                                LLVMValueRef args[2], unsigned bits, bool hi)
-{
-       assert(bits == 8 || bits == 10 || bits == 16);
-
-       LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
-               bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
-       LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
-               bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
-       LLVMValueRef max_alpha =
-               bits != 10 ? max_rgb : ctx->i32_1;
-       LLVMValueRef min_alpha =
-               bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
-
-       /* Clamp. */
-       if (bits != 16) {
-               for (int i = 0; i < 2; i++) {
-                       bool alpha = hi && i == 1;
-                       args[i] = ac_build_imin(ctx, args[i],
-                                               alpha ? max_alpha : max_rgb);
-                       args[i] = ac_build_imax(ctx, args[i],
-                                               alpha ? min_alpha : min_rgb);
-               }
-       }
-
-       LLVMValueRef res =
-               ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
-                                  ctx->v2i16, args, 2,
-                                  AC_FUNC_ATTR_READNONE);
-       return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-/* The 8-bit and 10-bit clamping is for HW workarounds. */
-LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
-                                LLVMValueRef args[2], unsigned bits, bool hi)
-{
-       assert(bits == 8 || bits == 10 || bits == 16);
-
-       LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
-               bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
-       LLVMValueRef max_alpha =
-               bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
-
-       /* Clamp. */
-       if (bits != 16) {
-               for (int i = 0; i < 2; i++) {
-                       bool alpha = hi && i == 1;
-                       args[i] = ac_build_umin(ctx, args[i],
-                                               alpha ? max_alpha : max_rgb);
-               }
-       }
-
-       LLVMValueRef res =
-               ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
-                                  ctx->v2i16, args, 2,
-                                  AC_FUNC_ATTR_READNONE);
-       return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
-{
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
-                                 &i1, 1, AC_FUNC_ATTR_READNONE);
-}
-
-void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
-{
-       ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
-                          &i1, 1, 0);
-}
-
-LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
-                         LLVMValueRef offset, LLVMValueRef width,
-                         bool is_signed)
-{
-       LLVMValueRef args[] = {
-               input,
-               offset,
-               width,
-       };
-
-       return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" :
-                                                  "llvm.amdgcn.ubfe.i32",
-                                 ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
-
-}
-
-LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
-                          LLVMValueRef s1, LLVMValueRef s2)
-{
-       return LLVMBuildAdd(ctx->builder,
-                           LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
-}
-
-LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
-                          LLVMValueRef s1, LLVMValueRef s2)
-{
-       /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
-       if (ctx->chip_class >= GFX10) {
-               return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32,
-                                         (LLVMValueRef []) {s0, s1, s2}, 3,
-                                         AC_FUNC_ATTR_READNONE);
-       }
-
-       return LLVMBuildFAdd(ctx->builder,
-                            LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
-}
-
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
-{
-       if (!wait_flags)
-               return;
-
-       unsigned lgkmcnt = 63;
-       unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
-       unsigned vscnt = 63;
-
-       if (wait_flags & AC_WAIT_LGKM)
-               lgkmcnt = 0;
-       if (wait_flags & AC_WAIT_VLOAD)
-               vmcnt = 0;
-
-       if (wait_flags & AC_WAIT_VSTORE) {
-               if (ctx->chip_class >= GFX10)
-                       vscnt = 0;
-               else
-                       vmcnt = 0;
-       }
-
-       /* There is no intrinsic for vscnt(0), so use a fence. */
-       if ((wait_flags & AC_WAIT_LGKM &&
-            wait_flags & AC_WAIT_VLOAD &&
-            wait_flags & AC_WAIT_VSTORE) ||
-           vscnt == 0) {
-               LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
-               return;
-       }
-
-       unsigned simm16 = (lgkmcnt << 8) |
-                         (7 << 4) | /* expcnt */
-                         (vmcnt & 0xf) |
-                         ((vmcnt >> 4) << 14);
-
-       LLVMValueRef args[1] = {
-               LLVMConstInt(ctx->i32, simm16, false),
-       };
-       ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
-                          ctx->voidt, args, 1, 0);
-}
-
-LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           LLVMValueRef src1, LLVMValueRef src2,
-                           unsigned bitsize)
-{
-       LLVMTypeRef type;
-       char *intr;
-
-       if (bitsize == 16) {
-               intr = "llvm.amdgcn.fmed3.f16";
-               type = ctx->f16;
-       } else if (bitsize == 32) {
-               intr = "llvm.amdgcn.fmed3.f32";
-               type = ctx->f32;
-       } else {
-               intr = "llvm.amdgcn.fmed3.f64";
-               type = ctx->f64;
-       }
-
-       LLVMValueRef params[] = {
-               src0,
-               src1,
-               src2,
-       };
-       return ac_build_intrinsic(ctx, intr, type, params, 3,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           unsigned bitsize)
-{
-       LLVMTypeRef type;
-       char *intr;
-
-       if (bitsize == 16) {
-               intr = "llvm.amdgcn.fract.f16";
-               type = ctx->f16;
-       } else if (bitsize == 32) {
-               intr = "llvm.amdgcn.fract.f32";
-               type = ctx->f32;
-       } else {
-               intr = "llvm.amdgcn.fract.f64";
-               type = ctx->f64;
-       }
-
-       LLVMValueRef params[] = {
-               src0,
-       };
-       return ac_build_intrinsic(ctx, intr, type, params, 1,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           unsigned bitsize)
-{
-       LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize);
-       LLVMValueRef zero = LLVMConstInt(type, 0, false);
-       LLVMValueRef one = LLVMConstInt(type, 1, false);
-
-       LLVMValueRef cmp, val;
-       cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
-       val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
-       cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
-       val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), "");
-       return val;
-}
-
-LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           unsigned bitsize)
-{
-       LLVMValueRef cmp, val, zero, one;
-       LLVMTypeRef type;
-
-       if (bitsize == 16) {
-               type = ctx->f16;
-               zero = ctx->f16_0;
-               one = ctx->f16_1;
-       } else if (bitsize == 32) {
-               type = ctx->f32;
-               zero = ctx->f32_0;
-               one = ctx->f32_1;
-       } else {
-               type = ctx->f64;
-               zero = ctx->f64_0;
-               one = ctx->f64_1;
-       }
-
-       cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
-       val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
-       cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
-       val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
-       return val;
-}
-
-LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
-{
-       LLVMValueRef result;
-       unsigned bitsize;
-
-       bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-
-       switch (bitsize) {
-       case 64:
-               result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-
-               result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
-               break;
-       case 32:
-               result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-               break;
-       case 16:
-               result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-
-               result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
-               break;
-       case 8:
-               result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-
-               result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
-               break;
-       default:
-               unreachable(!"invalid bitsize");
-               break;
-       }
-
-       return result;
-}
-
-LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
-                                      LLVMValueRef src0)
-{
-       LLVMValueRef result;
-       unsigned bitsize;
-
-       bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-
-       switch (bitsize) {
-       case 64:
-               result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-
-               result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
-               break;
-       case 32:
-               result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-               break;
-       case 16:
-               result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-
-               result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
-               break;
-       case 8:
-               result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8,
-                                           (LLVMValueRef []) { src0 }, 1,
-                                           AC_FUNC_ATTR_READNONE);
-
-               result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
-               break;
-       default:
-               unreachable(!"invalid bitsize");
-               break;
-       }
-
-       return result;
-}
-
-#define AC_EXP_TARGET          0
-#define AC_EXP_ENABLED_CHANNELS 1
-#define AC_EXP_OUT0            2
-
-enum ac_ir_type {
-       AC_IR_UNDEF,
-       AC_IR_CONST,
-       AC_IR_VALUE,
-};
-
-struct ac_vs_exp_chan
-{
-       LLVMValueRef value;
-       float const_float;
-       enum ac_ir_type type;
-};
-
-struct ac_vs_exp_inst {
-       unsigned offset;
-       LLVMValueRef inst;
-       struct ac_vs_exp_chan chan[4];
-};
-
-struct ac_vs_exports {
-       unsigned num;
-       struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
-};
-
-/* Return true if the PARAM export has been eliminated. */
-static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
-                                     uint32_t num_outputs,
-                                     struct ac_vs_exp_inst *exp)
-{
-       unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
-       bool is_zero[4] = {}, is_one[4] = {};
-
-       for (i = 0; i < 4; i++) {
-               /* It's a constant expression. Undef outputs are eliminated too. */
-               if (exp->chan[i].type == AC_IR_UNDEF) {
-                       is_zero[i] = true;
-                       is_one[i] = true;
-               } else if (exp->chan[i].type == AC_IR_CONST) {
-                       if (exp->chan[i].const_float == 0)
-                               is_zero[i] = true;
-                       else if (exp->chan[i].const_float == 1)
-                               is_one[i] = true;
-                       else
-                               return false; /* other constant */
-               } else
-                       return false;
-       }
-
-       /* Only certain combinations of 0 and 1 can be eliminated. */
-       if (is_zero[0] && is_zero[1] && is_zero[2])
-               default_val = is_zero[3] ? 0 : 1;
-       else if (is_one[0] && is_one[1] && is_one[2])
-               default_val = is_zero[3] ? 2 : 3;
-       else
-               return false;
-
-       /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
-       LLVMInstructionEraseFromParent(exp->inst);
-
-       /* Change OFFSET to DEFAULT_VAL. */
-       for (i = 0; i < num_outputs; i++) {
-               if (vs_output_param_offset[i] == exp->offset) {
-                       vs_output_param_offset[i] =
-                               AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
-                       break;
-               }
-       }
-       return true;
-}
-
-static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
-                                          uint8_t *vs_output_param_offset,
-                                          uint32_t num_outputs,
-                                          struct ac_vs_exports *processed,
-                                          struct ac_vs_exp_inst *exp)
-{
-       unsigned p, copy_back_channels = 0;
-
-       /* See if the output is already in the list of processed outputs.
-        * The LLVMValueRef comparison relies on SSA.
-        */
-       for (p = 0; p < processed->num; p++) {
-               bool different = false;
-
-               for (unsigned j = 0; j < 4; j++) {
-                       struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
-                       struct ac_vs_exp_chan *c2 = &exp->chan[j];
-
-                       /* Treat undef as a match. */
-                       if (c2->type == AC_IR_UNDEF)
-                               continue;
-
-                       /* If c1 is undef but c2 isn't, we can copy c2 to c1
-                        * and consider the instruction duplicated.
-                        */
-                       if (c1->type == AC_IR_UNDEF) {
-                               copy_back_channels |= 1 << j;
-                               continue;
-                       }
-
-                       /* Test whether the channels are not equal. */
-                       if (c1->type != c2->type ||
-                           (c1->type == AC_IR_CONST &&
-                            c1->const_float != c2->const_float) ||
-                           (c1->type == AC_IR_VALUE &&
-                            c1->value != c2->value)) {
-                               different = true;
-                               break;
-                       }
-               }
-               if (!different)
-                       break;
-
-               copy_back_channels = 0;
-       }
-       if (p == processed->num)
-               return false;
-
-       /* If a match was found, but the matching export has undef where the new
-        * one has a normal value, copy the normal value to the undef channel.
-        */
-       struct ac_vs_exp_inst *match = &processed->exp[p];
-
-       /* Get current enabled channels mask. */
-       LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
-       unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
-
-       while (copy_back_channels) {
-               unsigned chan = u_bit_scan(&copy_back_channels);
-
-               assert(match->chan[chan].type == AC_IR_UNDEF);
-               LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
-                              exp->chan[chan].value);
-               match->chan[chan] = exp->chan[chan];
-
-               /* Update number of enabled channels because the original mask
-                * is not always 0xf.
-                */
-               enabled_channels |= (1 << chan);
-               LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
-                              LLVMConstInt(ctx->i32, enabled_channels, 0));
-       }
-
-       /* The PARAM export is duplicated. Kill it. */
-       LLVMInstructionEraseFromParent(exp->inst);
-
-       /* Change OFFSET to the matching export. */
-       for (unsigned i = 0; i < num_outputs; i++) {
-               if (vs_output_param_offset[i] == exp->offset) {
-                       vs_output_param_offset[i] = match->offset;
-                       break;
-               }
-       }
-       return true;
-}
-
-void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
-                           LLVMValueRef main_fn,
-                           uint8_t *vs_output_param_offset,
-                           uint32_t num_outputs,
-                           uint8_t *num_param_exports)
-{
-       LLVMBasicBlockRef bb;
-       bool removed_any = false;
-       struct ac_vs_exports exports;
-
-       exports.num = 0;
-
-       /* Process all LLVM instructions. */
-       bb = LLVMGetFirstBasicBlock(main_fn);
-       while (bb) {
-               LLVMValueRef inst = LLVMGetFirstInstruction(bb);
-
-               while (inst) {
-                       LLVMValueRef cur = inst;
-                       inst = LLVMGetNextInstruction(inst);
-                       struct ac_vs_exp_inst exp;
-
-                       if (LLVMGetInstructionOpcode(cur) != LLVMCall)
-                               continue;
-
-                       LLVMValueRef callee = ac_llvm_get_called_value(cur);
-
-                       if (!ac_llvm_is_function(callee))
-                               continue;
-
-                       const char *name = LLVMGetValueName(callee);
-                       unsigned num_args = LLVMCountParams(callee);
-
-                       /* Check if this is an export instruction. */
-                       if ((num_args != 9 && num_args != 8) ||
-                           (strcmp(name, "llvm.SI.export") &&
-                            strcmp(name, "llvm.amdgcn.exp.f32")))
-                               continue;
-
-                       LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
-                       unsigned target = LLVMConstIntGetZExtValue(arg);
-
-                       if (target < V_008DFC_SQ_EXP_PARAM)
-                               continue;
-
-                       target -= V_008DFC_SQ_EXP_PARAM;
-
-                       /* Parse the instruction. */
-                       memset(&exp, 0, sizeof(exp));
-                       exp.offset = target;
-                       exp.inst = cur;
-
-                       for (unsigned i = 0; i < 4; i++) {
-                               LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
-
-                               exp.chan[i].value = v;
-
-                               if (LLVMIsUndef(v)) {
-                                       exp.chan[i].type = AC_IR_UNDEF;
-                               } else if (LLVMIsAConstantFP(v)) {
-                                       LLVMBool loses_info;
-                                       exp.chan[i].type = AC_IR_CONST;
-                                       exp.chan[i].const_float =
-                                               LLVMConstRealGetDouble(v, &loses_info);
-                               } else {
-                                       exp.chan[i].type = AC_IR_VALUE;
-                               }
-                       }
-
-                       /* Eliminate constant and duplicated PARAM exports. */
-                       if (ac_eliminate_const_output(vs_output_param_offset,
-                                                     num_outputs, &exp) ||
-                           ac_eliminate_duplicated_output(ctx,
-                                                          vs_output_param_offset,
-                                                          num_outputs, &exports,
-                                                          &exp)) {
-                               removed_any = true;
-                       } else {
-                               exports.exp[exports.num++] = exp;
-                       }
-               }
-               bb = LLVMGetNextBasicBlock(bb);
-       }
-
-       /* Remove holes in export memory due to removed PARAM exports.
-        * This is done by renumbering all PARAM exports.
-        */
-       if (removed_any) {
-               uint8_t old_offset[VARYING_SLOT_MAX];
-               unsigned out, i;
-
-               /* Make a copy of the offsets. We need the old version while
-                * we are modifying some of them. */
-               memcpy(old_offset, vs_output_param_offset,
-                      sizeof(old_offset));
-
-               for (i = 0; i < exports.num; i++) {
-                       unsigned offset = exports.exp[i].offset;
-
-                       /* Update vs_output_param_offset. Multiple outputs can
-                        * have the same offset.
-                        */
-                       for (out = 0; out < num_outputs; out++) {
-                               if (old_offset[out] == offset)
-                                       vs_output_param_offset[out] = i;
-                       }
-
-                       /* Change the PARAM offset in the instruction. */
-                       LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
-                                      LLVMConstInt(ctx->i32,
-                                                   V_008DFC_SQ_EXP_PARAM + i, 0));
-               }
-               *num_param_exports = exports.num;
-       }
-}
-
-void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
-{
-       LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
-       ac_build_intrinsic(ctx,
-                          "llvm.amdgcn.init.exec", ctx->voidt,
-                          &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
-}
-
-void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
-{
-       unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
-       ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
-                                    LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
-                                    "lds");
-}
-
-LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
-                        LLVMValueRef dw_addr)
-{
-       return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
-}
-
-void ac_lds_store(struct ac_llvm_context *ctx,
-                 LLVMValueRef dw_addr,
-                 LLVMValueRef value)
-{
-       value = ac_to_integer(ctx, value);
-       ac_build_indexed_store(ctx, ctx->lds,
-                              dw_addr, value);
-}
-
-LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
-                        LLVMTypeRef dst_type,
-                        LLVMValueRef src0)
-{
-       unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-       const char *intrin_name;
-       LLVMTypeRef type;
-       LLVMValueRef zero;
-
-       switch (src0_bitsize) {
-       case 64:
-               intrin_name = "llvm.cttz.i64";
-               type = ctx->i64;
-               zero = ctx->i64_0;
-               break;
-       case 32:
-               intrin_name = "llvm.cttz.i32";
-               type = ctx->i32;
-               zero = ctx->i32_0;
-               break;
-       case 16:
-               intrin_name = "llvm.cttz.i16";
-               type = ctx->i16;
-               zero = ctx->i16_0;
-               break;
-       case 8:
-               intrin_name = "llvm.cttz.i8";
-               type = ctx->i8;
-               zero = ctx->i8_0;
-               break;
-       default:
-               unreachable(!"invalid bitsize");
-       }
-
-       LLVMValueRef params[2] = {
-               src0,
-
-               /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
-                * add special code to check for x=0. The reason is that
-                * the LLVM behavior for x=0 is different from what we
-                * need here. However, LLVM also assumes that ffs(x) is
-                * in [0, 31], but GLSL expects that ffs(0) = -1, so
-                * a conditional assignment to handle 0 is still required.
-                *
-                * The hardware already implements the correct behavior.
-                */
-               ctx->i1true,
-       };
-
-       LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
-                                             params, 2,
-                                             AC_FUNC_ATTR_READNONE);
-
-       if (src0_bitsize == 64) {
-               lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
-       } else if (src0_bitsize < 32) {
-               lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
-       }
-
-       /* TODO: We need an intrinsic to skip this conditional. */
-       /* Check for zero: */
-       return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
-                                                          LLVMIntEQ, src0,
-                                                          zero, ""),
-                              LLVMConstInt(ctx->i32, -1, 0), lsb, "");
-}
-
-LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
-{
-       return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST);
-}
-
-LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
-{
-       return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT);
-}
-
-static struct ac_llvm_flow *
-get_current_flow(struct ac_llvm_context *ctx)
-{
-       if (ctx->flow->depth > 0)
-               return &ctx->flow->stack[ctx->flow->depth - 1];
-       return NULL;
-}
-
-static struct ac_llvm_flow *
-get_innermost_loop(struct ac_llvm_context *ctx)
-{
-       for (unsigned i = ctx->flow->depth; i > 0; --i) {
-               if (ctx->flow->stack[i - 1].loop_entry_block)
-                       return &ctx->flow->stack[i - 1];
-       }
-       return NULL;
-}
-
-static struct ac_llvm_flow *
-push_flow(struct ac_llvm_context *ctx)
-{
-       struct ac_llvm_flow *flow;
-
-       if (ctx->flow->depth >= ctx->flow->depth_max) {
-               unsigned new_max = MAX2(ctx->flow->depth << 1,
-                                       AC_LLVM_INITIAL_CF_DEPTH);
-
-               ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
-               ctx->flow->depth_max = new_max;
-       }
-
-       flow = &ctx->flow->stack[ctx->flow->depth];
-       ctx->flow->depth++;
-
-       flow->next_block = NULL;
-       flow->loop_entry_block = NULL;
-       return flow;
-}
-
-static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
-                               int label_id)
-{
-       char buf[32];
-       snprintf(buf, sizeof(buf), "%s%d", base, label_id);
-       LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
-}
-
-/* Append a basic block at the level of the parent flow.
- */
-static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
-                                           const char *name)
-{
-       assert(ctx->flow->depth >= 1);
-
-       if (ctx->flow->depth >= 2) {
-               struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
-
-               return LLVMInsertBasicBlockInContext(ctx->context,
-                                                    flow->next_block, name);
-       }
-
-       LLVMValueRef main_fn =
-               LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
-       return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
-}
-
-/* Emit a branch to the given default target for the current block if
- * applicable -- that is, if the current block does not already contain a
- * branch from a break or continue.
- */
-static void emit_default_branch(LLVMBuilderRef builder,
-                               LLVMBasicBlockRef target)
-{
-       if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
-                LLVMBuildBr(builder, target);
-}
-
-void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
-{
-       struct ac_llvm_flow *flow = push_flow(ctx);
-       flow->loop_entry_block = append_basic_block(ctx, "LOOP");
-       flow->next_block = append_basic_block(ctx, "ENDLOOP");
-       set_basicblock_name(flow->loop_entry_block, "loop", label_id);
-       LLVMBuildBr(ctx->builder, flow->loop_entry_block);
-       LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
-}
-
-void ac_build_break(struct ac_llvm_context *ctx)
-{
-       struct ac_llvm_flow *flow = get_innermost_loop(ctx);
-       LLVMBuildBr(ctx->builder, flow->next_block);
-}
-
-void ac_build_continue(struct ac_llvm_context *ctx)
-{
-       struct ac_llvm_flow *flow = get_innermost_loop(ctx);
-       LLVMBuildBr(ctx->builder, flow->loop_entry_block);
-}
-
-void ac_build_else(struct ac_llvm_context *ctx, int label_id)
-{
-       struct ac_llvm_flow *current_branch = get_current_flow(ctx);
-       LLVMBasicBlockRef endif_block;
-
-       assert(!current_branch->loop_entry_block);
-
-       endif_block = append_basic_block(ctx, "ENDIF");
-       emit_default_branch(ctx->builder, endif_block);
-
-       LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
-       set_basicblock_name(current_branch->next_block, "else", label_id);
-
-       current_branch->next_block = endif_block;
-}
-
-void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
-{
-       struct ac_llvm_flow *current_branch = get_current_flow(ctx);
-
-       assert(!current_branch->loop_entry_block);
-
-       emit_default_branch(ctx->builder, current_branch->next_block);
-       LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
-       set_basicblock_name(current_branch->next_block, "endif", label_id);
-
-       ctx->flow->depth--;
-}
-
-void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
-{
-       struct ac_llvm_flow *current_loop = get_current_flow(ctx);
-
-       assert(current_loop->loop_entry_block);
-
-       emit_default_branch(ctx->builder, current_loop->loop_entry_block);
-
-       LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
-       set_basicblock_name(current_loop->next_block, "endloop", label_id);
-       ctx->flow->depth--;
-}
-
-void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
-{
-       struct ac_llvm_flow *flow = push_flow(ctx);
-       LLVMBasicBlockRef if_block;
-
-       if_block = append_basic_block(ctx, "IF");
-       flow->next_block = append_basic_block(ctx, "ELSE");
-       set_basicblock_name(if_block, "if", label_id);
-       LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
-       LLVMPositionBuilderAtEnd(ctx->builder, if_block);
-}
-
-void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
-                int label_id)
-{
-       LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
-                                         value, ctx->f32_0, "");
-       ac_build_ifcc(ctx, cond, label_id);
-}
-
-void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
-                 int label_id)
-{
-       LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
-                                         ac_to_integer(ctx, value),
-                                         ctx->i32_0, "");
-       ac_build_ifcc(ctx, cond, label_id);
-}
-
-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
-                            const char *name)
-{
-       LLVMBuilderRef builder = ac->builder;
-       LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
-       LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
-       LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
-       LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
-       LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
-       LLVMValueRef res;
-
-       if (first_instr) {
-               LLVMPositionBuilderBefore(first_builder, first_instr);
-       } else {
-               LLVMPositionBuilderAtEnd(first_builder, first_block);
-       }
-
-       res = LLVMBuildAlloca(first_builder, type, name);
-       LLVMDisposeBuilder(first_builder);
-       return res;
-}
-
-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac,
-                                  LLVMTypeRef type, const char *name)
-{
-       LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
-       LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
-       return ptr;
-}
-
-LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
-                         LLVMTypeRef type)
-{
-       int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-       return LLVMBuildBitCast(ctx->builder, ptr,
-                               LLVMPointerType(type, addr_space), "");
-}
-
-LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
-                           unsigned count)
-{
-       unsigned num_components = ac_get_llvm_num_components(value);
-       if (count == num_components)
-               return value;
-
-       LLVMValueRef masks[MAX2(count, 2)];
-       masks[0] = ctx->i32_0;
-       masks[1] = ctx->i32_1;
-       for (unsigned i = 2; i < count; i++)
-               masks[i] = LLVMConstInt(ctx->i32, i, false);
-
-       if (count == 1)
-               return LLVMBuildExtractElement(ctx->builder, value, masks[0],
-                                              "");
-
-       LLVMValueRef swizzle = LLVMConstVector(masks, count);
-       return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
-}
-
-LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
-                            unsigned rshift, unsigned bitwidth)
-{
-       LLVMValueRef value = param;
-       if (rshift)
-               value = LLVMBuildLShr(ctx->builder, value,
-                                     LLVMConstInt(ctx->i32, rshift, false), "");
-
-       if (rshift + bitwidth < 32) {
-               unsigned mask = (1 << bitwidth) - 1;
-               value = LLVMBuildAnd(ctx->builder, value,
-                                    LLVMConstInt(ctx->i32, mask, false), "");
-       }
-       return value;
-}
-
-/* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x11111100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- *   addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
- */
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
-                             LLVMValueRef *addr, bool is_array_tex)
-{
-       struct ac_image_args fmask_load = {};
-       fmask_load.opcode = ac_image_load;
-       fmask_load.resource = fmask;
-       fmask_load.dmask = 0xf;
-       fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
-       fmask_load.attributes = AC_FUNC_ATTR_READNONE;
-
-       fmask_load.coords[0] = addr[0];
-       fmask_load.coords[1] = addr[1];
-       if (is_array_tex)
-               fmask_load.coords[2] = addr[2];
-
-       LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
-       fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
-                                             ac->i32_0, "");
-
-       /* Apply the formula. */
-       unsigned sample_chan = is_array_tex ? 3 : 2;
-       LLVMValueRef final_sample;
-       final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
-                                   LLVMConstInt(ac->i32, 4, 0), "");
-       final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
-       /* Mask the sample index by 0x7, because 0x8 means an unknown value
-        * with EQAA, so those will map to 0. */
-       final_sample = LLVMBuildAnd(ac->builder, final_sample,
-                                   LLVMConstInt(ac->i32, 0x7, 0), "");
-
-       /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
-        * resource descriptor is 0 (invalid).
-        */
-       LLVMValueRef tmp;
-       tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
-       tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
-       tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
-
-       /* Replace the MSAA sample index. */
-       addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
-                                           addr[sample_chan], "");
-}
-
-static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
-{
-       ac_build_optimization_barrier(ctx, &src);
-       return ac_build_intrinsic(ctx,
-                       lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
-                       LLVMTypeOf(src), (LLVMValueRef []) {
-                       src, lane },
-                       lane == NULL ? 1 : 2,
-                       AC_FUNC_ATTR_READNONE |
-                       AC_FUNC_ATTR_CONVERGENT);
-}
-
-/**
- * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
- * @param ctx
- * @param src
- * @param lane - id of the lane or NULL for the first active lane
- * @return value of the lane
- */
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
-{
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
-       LLVMValueRef ret;
-
-       if (bits == 32) {
-               ret = _ac_build_readlane(ctx, src, lane);
-       } else {
-               assert(bits % 32 == 0);
-               LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
-               LLVMValueRef src_vector =
-                       LLVMBuildBitCast(ctx->builder, src, vec_type, "");
-               ret = LLVMGetUndef(vec_type);
-               for (unsigned i = 0; i < bits / 32; i++) {
-                       src = LLVMBuildExtractElement(ctx->builder, src_vector,
-                                               LLVMConstInt(ctx->i32, i, 0), "");
-                       LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
-                       ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
-                                               LLVMConstInt(ctx->i32, i, 0), "");
-               }
-       }
-       if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
-               return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-LLVMValueRef
-ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane)
-{
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
-                                 (LLVMValueRef []) {value, lane, src}, 3,
-                                 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
-{
-       if (ctx->wave_size == 32) {
-               return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
-                                         (LLVMValueRef []) { mask, ctx->i32_0 },
-                                         2, AC_FUNC_ATTR_READNONE);
-       }
-       LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
-                                                LLVMVectorType(ctx->i32, 2),
-                                                "");
-       LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
-                                                      ctx->i32_0, "");
-       LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
-                                                      ctx->i32_1, "");
-       LLVMValueRef val =
-               ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
-                                  (LLVMValueRef []) { mask_lo, ctx->i32_0 },
-                                  2, AC_FUNC_ATTR_READNONE);
-       val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
-                                (LLVMValueRef []) { mask_hi, val },
-                                2, AC_FUNC_ATTR_READNONE);
-       return val;
-}
-
-enum dpp_ctrl {
-       _dpp_quad_perm = 0x000,
-       _dpp_row_sl = 0x100,
-       _dpp_row_sr = 0x110,
-       _dpp_row_rr = 0x120,
-       dpp_wf_sl1 = 0x130,
-       dpp_wf_rl1 = 0x134,
-       dpp_wf_sr1 = 0x138,
-       dpp_wf_rr1 = 0x13C,
-       dpp_row_mirror = 0x140,
-       dpp_row_half_mirror = 0x141,
-       dpp_row_bcast15 = 0x142,
-       dpp_row_bcast31 = 0x143
-};
-
-static inline enum dpp_ctrl
-dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
-       assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
-       return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
-}
-
-static inline enum dpp_ctrl
-dpp_row_sl(unsigned amount)
-{
-       assert(amount > 0 && amount < 16);
-       return _dpp_row_sl | amount;
-}
-
-static inline enum dpp_ctrl
-dpp_row_sr(unsigned amount)
-{
-       assert(amount > 0 && amount < 16);
-       return _dpp_row_sr | amount;
-}
-
-static LLVMValueRef
-_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
-             enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
-             bool bound_ctrl)
-{
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32",
-                                       LLVMTypeOf(old),
-                                       (LLVMValueRef[]) {
-                                               old, src,
-                                               LLVMConstInt(ctx->i32, dpp_ctrl, 0),
-                                               LLVMConstInt(ctx->i32, row_mask, 0),
-                                               LLVMConstInt(ctx->i32, bank_mask, 0),
-                                               LLVMConstInt(ctx->i1, bound_ctrl, 0) },
-                                       6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-static LLVMValueRef
-ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
-            enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
-            bool bound_ctrl)
-{
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       old = ac_to_integer(ctx, old);
-       unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
-       LLVMValueRef ret;
-       if (bits == 32) {
-               ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask,
-                                   bank_mask, bound_ctrl);
-       } else {
-               assert(bits % 32 == 0);
-               LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
-               LLVMValueRef src_vector =
-                       LLVMBuildBitCast(ctx->builder, src, vec_type, "");
-               LLVMValueRef old_vector =
-                       LLVMBuildBitCast(ctx->builder, old, vec_type, "");
-               ret = LLVMGetUndef(vec_type);
-               for (unsigned i = 0; i < bits / 32; i++) {
-                       src = LLVMBuildExtractElement(ctx->builder, src_vector,
-                                                     LLVMConstInt(ctx->i32, i,
-                                                                  0), "");
-                       old = LLVMBuildExtractElement(ctx->builder, old_vector,
-                                                     LLVMConstInt(ctx->i32, i,
-                                                                  0), "");
-                       LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src,
-                                                             dpp_ctrl,
-                                                             row_mask,
-                                                             bank_mask,
-                                                             bound_ctrl);
-                       ret = LLVMBuildInsertElement(ctx->builder, ret,
-                                                    ret_comp,
-                                                    LLVMConstInt(ctx->i32, i,
-                                                                 0), "");
-               }
-       }
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
-                    bool exchange_rows, bool bound_ctrl)
-{
-       LLVMValueRef args[6] = {
-               src,
-               src,
-               LLVMConstInt(ctx->i32, sel, false),
-               LLVMConstInt(ctx->i32, sel >> 32, false),
-               ctx->i1true, /* fi */
-               bound_ctrl ? ctx->i1true : ctx->i1false,
-       };
-       return ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
-                                                    : "llvm.amdgcn.permlane16",
-                                 ctx->i32, args, 6,
-                                 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-static LLVMValueRef
-ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
-                   bool exchange_rows, bool bound_ctrl)
-{
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
-       LLVMValueRef ret;
-       if (bits == 32) {
-               ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
-                                          bound_ctrl);
-       } else {
-               assert(bits % 32 == 0);
-               LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
-               LLVMValueRef src_vector =
-                       LLVMBuildBitCast(ctx->builder, src, vec_type, "");
-               ret = LLVMGetUndef(vec_type);
-               for (unsigned i = 0; i < bits / 32; i++) {
-                       src = LLVMBuildExtractElement(ctx->builder, src_vector,
-                                                     LLVMConstInt(ctx->i32, i,
-                                                                  0), "");
-                       LLVMValueRef ret_comp =
-                               _ac_build_permlane16(ctx, src, sel,
-                                                    exchange_rows,
-                                                    bound_ctrl);
-                       ret = LLVMBuildInsertElement(ctx->builder, ret,
-                                                    ret_comp,
-                                                    LLVMConstInt(ctx->i32, i,
-                                                                 0), "");
-               }
-       }
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static inline unsigned
-ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
-{
-       assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
-       return and_mask | (or_mask << 5) | (xor_mask << 10);
-}
-
-static LLVMValueRef
-_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle",
-                                  LLVMTypeOf(src), (LLVMValueRef []) {
-                                       src, LLVMConstInt(ctx->i32, mask, 0) },
-                                  2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
-       LLVMValueRef ret;
-       if (bits == 32) {
-               ret = _ac_build_ds_swizzle(ctx, src, mask);
-       } else {
-               assert(bits % 32 == 0);
-               LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
-               LLVMValueRef src_vector =
-                       LLVMBuildBitCast(ctx->builder, src, vec_type, "");
-               ret = LLVMGetUndef(vec_type);
-               for (unsigned i = 0; i < bits / 32; i++) {
-                       src = LLVMBuildExtractElement(ctx->builder, src_vector,
-                                                     LLVMConstInt(ctx->i32, i,
-                                                                  0), "");
-                       LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src,
-                                                                    mask);
-                       ret = LLVMBuildInsertElement(ctx->builder, ret,
-                                                    ret_comp,
-                                                    LLVMConstInt(ctx->i32, i,
-                                                                 0), "");
-               }
-       }
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
-{
-       char name[32], type[8];
-       ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
-       snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
-       return ac_build_intrinsic(ctx, name, LLVMTypeOf(src),
-                                 (LLVMValueRef []) { src }, 1,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef
-ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
-                     LLVMValueRef inactive)
-{
-       char name[33], type[8];
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       inactive = ac_to_integer(ctx, inactive);
-       ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
-       snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
-       LLVMValueRef ret =
-               ac_build_intrinsic(ctx, name,
-                                       LLVMTypeOf(src), (LLVMValueRef []) {
-                                       src, inactive }, 2,
-                                       AC_FUNC_ATTR_READNONE |
-                                       AC_FUNC_ATTR_CONVERGENT);
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size)
-{
-       if (type_size == 4) {
-               switch (op) {
-               case nir_op_iadd: return ctx->i32_0;
-               case nir_op_fadd: return ctx->f32_0;
-               case nir_op_imul: return ctx->i32_1;
-               case nir_op_fmul: return ctx->f32_1;
-               case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0);
-               case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
-               case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY);
-               case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0);
-               case nir_op_umax: return ctx->i32_0;
-               case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY);
-               case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0);
-               case nir_op_ior: return ctx->i32_0;
-               case nir_op_ixor: return ctx->i32_0;
-               default:
-                       unreachable("bad reduction intrinsic");
-               }
-       } else { /* type_size == 64bit */
-               switch (op) {
-               case nir_op_iadd: return ctx->i64_0;
-               case nir_op_fadd: return ctx->f64_0;
-               case nir_op_imul: return ctx->i64_1;
-               case nir_op_fmul: return ctx->f64_1;
-               case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0);
-               case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
-               case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY);
-               case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0);
-               case nir_op_umax: return ctx->i64_0;
-               case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY);
-               case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0);
-               case nir_op_ior: return ctx->i64_0;
-               case nir_op_ixor: return ctx->i64_0;
-               default:
-                       unreachable("bad reduction intrinsic");
-               }
-       }
-}
-
-static LLVMValueRef
-ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op)
-{
-       bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
-       switch (op) {
-       case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
-       case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
-       case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, "");
-       case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
-       case nir_op_imin: return LLVMBuildSelect(ctx->builder,
-                                       LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
-                                       lhs, rhs, "");
-       case nir_op_umin: return LLVMBuildSelect(ctx->builder,
-                                       LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
-                                       lhs, rhs, "");
-       case nir_op_fmin: return ac_build_intrinsic(ctx,
-                                       _64bit ? "llvm.minnum.f64" : "llvm.minnum.f32",
-                                       _64bit ? ctx->f64 : ctx->f32,
-                                       (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
-       case nir_op_imax: return LLVMBuildSelect(ctx->builder,
-                                       LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
-                                       lhs, rhs, "");
-       case nir_op_umax: return LLVMBuildSelect(ctx->builder,
-                                       LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
-                                       lhs, rhs, "");
-       case nir_op_fmax: return ac_build_intrinsic(ctx,
-                                       _64bit ? "llvm.maxnum.f64" : "llvm.maxnum.f32",
-                                       _64bit ? ctx->f64 : ctx->f32,
-                                       (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
-       case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
-       case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
-       case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
-       default:
-               unreachable("bad reduction intrinsic");
-       }
-}
-
-/**
- * \param maxprefix specifies that the result only needs to be correct for a
- *     prefix of this many threads
- *
- * TODO: add inclusive and excluse scan functions for GFX6.
- */
-static LLVMValueRef
-ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
-             unsigned maxprefix, bool inclusive)
-{
-       LLVMValueRef result, tmp;
-
-       if (ctx->chip_class >= GFX10) {
-               result = inclusive ? src : identity;
-       } else {
-               if (!inclusive)
-                       src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
-               result = src;
-       }
-       if (maxprefix <= 1)
-               return result;
-       tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       if (maxprefix <= 2)
-               return result;
-       tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       if (maxprefix <= 3)
-               return result;
-       tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       if (maxprefix <= 4)
-               return result;
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       if (maxprefix <= 8)
-               return result;
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       if (maxprefix <= 16)
-               return result;
-
-       if (ctx->chip_class >= GFX10) {
-               /* dpp_row_bcast{15,31} are not supported on gfx10. */
-               LLVMBuilderRef builder = ctx->builder;
-               LLVMValueRef tid = ac_get_thread_id(ctx);
-               LLVMValueRef cc;
-               /* TODO-GFX10: Can we get better code-gen by putting this into
-                * a branch so that LLVM generates EXEC mask manipulations? */
-               if (inclusive)
-                       tmp = result;
-               else
-                       tmp = ac_build_alu_op(ctx, result, src, op);
-               tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
-               tmp = ac_build_alu_op(ctx, result, tmp, op);
-               cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, false), "");
-               cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
-               result = LLVMBuildSelect(builder, cc, tmp, result, "");
-               if (maxprefix <= 32)
-                       return result;
-
-               if (inclusive)
-                       tmp = result;
-               else
-                       tmp = ac_build_alu_op(ctx, result, src, op);
-               tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, false));
-               tmp = ac_build_alu_op(ctx, result, tmp, op);
-               cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
-                                  LLVMConstInt(ctx->i32, 32, false), "");
-               result = LLVMBuildSelect(builder, cc, tmp, result, "");
-               return result;
-       }
-
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       if (maxprefix <= 32)
-               return result;
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       return result;
-}
-
-LLVMValueRef
-ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
-{
-       LLVMValueRef result;
-
-       if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
-               LLVMBuilderRef builder = ctx->builder;
-               src = LLVMBuildZExt(builder, src, ctx->i32, "");
-               result = ac_build_ballot(ctx, src);
-               result = ac_build_mbcnt(ctx, result);
-               result = LLVMBuildAdd(builder, result, src, "");
-               return result;
-       }
-
-       ac_build_optimization_barrier(ctx, &src);
-
-       LLVMValueRef identity =
-               get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
-       result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
-                                 LLVMTypeOf(identity), "");
-       result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
-
-       return ac_build_wwm(ctx, result);
-}
-
-LLVMValueRef
-ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
-{
-       LLVMValueRef result;
-
-       if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
-               LLVMBuilderRef builder = ctx->builder;
-               src = LLVMBuildZExt(builder, src, ctx->i32, "");
-               result = ac_build_ballot(ctx, src);
-               result = ac_build_mbcnt(ctx, result);
-               return result;
-       }
-
-       ac_build_optimization_barrier(ctx, &src);
-
-       LLVMValueRef identity =
-               get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
-       result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
-                                 LLVMTypeOf(identity), "");
-       result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
-
-       return ac_build_wwm(ctx, result);
-}
-
-LLVMValueRef
-ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
-{
-       if (cluster_size == 1) return src;
-       ac_build_optimization_barrier(ctx, &src);
-       LLVMValueRef result, swap;
-       LLVMValueRef identity = get_reduction_identity(ctx, op,
-                                                               ac_get_type_size(LLVMTypeOf(src)));
-       result = LLVMBuildBitCast(ctx->builder,
-                                                               ac_build_set_inactive(ctx, src, identity),
-                                                               LLVMTypeOf(identity), "");
-       swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
-       result = ac_build_alu_op(ctx, result, swap, op);
-       if (cluster_size == 2) return ac_build_wwm(ctx, result);
-
-       swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
-       result = ac_build_alu_op(ctx, result, swap, op);
-       if (cluster_size == 4) return ac_build_wwm(ctx, result);
-
-       if (ctx->chip_class >= GFX8)
-               swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
-       else
-               swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
-       result = ac_build_alu_op(ctx, result, swap, op);
-       if (cluster_size == 8) return ac_build_wwm(ctx, result);
-
-       if (ctx->chip_class >= GFX8)
-               swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
-       else
-               swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
-       result = ac_build_alu_op(ctx, result, swap, op);
-       if (cluster_size == 16) return ac_build_wwm(ctx, result);
-
-       if (ctx->chip_class >= GFX10)
-               swap = ac_build_permlane16(ctx, result, 0, true, false);
-       else if (ctx->chip_class >= GFX8 && cluster_size != 32)
-               swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
-       else
-               swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
-       result = ac_build_alu_op(ctx, result, swap, op);
-       if (cluster_size == 32) return ac_build_wwm(ctx, result);
-
-       if (ctx->chip_class >= GFX8) {
-               if (ctx->chip_class >= GFX10)
-                       swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
-               else
-                       swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
-               result = ac_build_alu_op(ctx, result, swap, op);
-               result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
-               return ac_build_wwm(ctx, result);
-       } else {
-               swap = ac_build_readlane(ctx, result, ctx->i32_0);
-               result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
-               result = ac_build_alu_op(ctx, result, swap, op);
-               return ac_build_wwm(ctx, result);
-       }
-}
-
-/**
- * "Top half" of a scan that reduces per-wave values across an entire
- * workgroup.
- *
- * The source value must be present in the highest lane of the wave, and the
- * highest lane must be live.
- */
-void
-ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
-       if (ws->maxwaves <= 1)
-               return;
-
-       const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
-       LLVMBuilderRef builder = ctx->builder;
-       LLVMValueRef tid = ac_get_thread_id(ctx);
-       LLVMValueRef tmp;
-
-       tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
-       ac_build_ifcc(ctx, tmp, 1000);
-       LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
-       ac_build_endif(ctx, 1000);
-}
-
-/**
- * "Bottom half" of a scan that reduces per-wave values across an entire
- * workgroup.
- *
- * The caller must place a barrier between the top and bottom halves.
- */
-void
-ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
-       const LLVMTypeRef type = LLVMTypeOf(ws->src);
-       const LLVMValueRef identity =
-               get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
-
-       if (ws->maxwaves <= 1) {
-               ws->result_reduce = ws->src;
-               ws->result_inclusive = ws->src;
-               ws->result_exclusive = identity;
-               return;
-       }
-       assert(ws->maxwaves <= 32);
-
-       LLVMBuilderRef builder = ctx->builder;
-       LLVMValueRef tid = ac_get_thread_id(ctx);
-       LLVMBasicBlockRef bbs[2];
-       LLVMValueRef phivalues_scan[2];
-       LLVMValueRef tmp, tmp2;
-
-       bbs[0] = LLVMGetInsertBlock(builder);
-       phivalues_scan[0] = LLVMGetUndef(type);
-
-       if (ws->enable_reduce)
-               tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
-       else if (ws->enable_inclusive)
-               tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
-       else
-               tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
-       ac_build_ifcc(ctx, tmp, 1001);
-       {
-               tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
-
-               ac_build_optimization_barrier(ctx, &tmp);
-
-               bbs[1] = LLVMGetInsertBlock(builder);
-               phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
-       }
-       ac_build_endif(ctx, 1001);
-
-       const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
-
-       if (ws->enable_reduce) {
-               tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
-               ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
-       }
-       if (ws->enable_inclusive)
-               ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
-       if (ws->enable_exclusive) {
-               tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
-               tmp = ac_build_readlane(ctx, scan, tmp);
-               tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
-               ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
-       }
-}
-
-/**
- * Inclusive scan of a per-wave value across an entire workgroup.
- *
- * This implies an s_barrier instruction.
- *
- * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads
- * of the workgroup are live. (This requirement cannot easily be relaxed in a
- * useful manner because of the barrier in the algorithm.)
- */
-void
-ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
-       ac_build_wg_wavescan_top(ctx, ws);
-       ac_build_s_barrier(ctx);
-       ac_build_wg_wavescan_bottom(ctx, ws);
-}
-
-/**
- * "Top half" of a scan that reduces per-thread values across an entire
- * workgroup.
- *
- * All lanes must be active when this code runs.
- */
-void
-ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
-       if (ws->enable_exclusive) {
-               ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
-               if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
-                       ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
-               ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
-       } else {
-               ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
-       }
-
-       bool enable_inclusive = ws->enable_inclusive;
-       bool enable_exclusive = ws->enable_exclusive;
-       ws->enable_inclusive = false;
-       ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
-       ac_build_wg_wavescan_top(ctx, ws);
-       ws->enable_inclusive = enable_inclusive;
-       ws->enable_exclusive = enable_exclusive;
-}
-
-/**
- * "Bottom half" of a scan that reduces per-thread values across an entire
- * workgroup.
- *
- * The caller must place a barrier between the top and bottom halves.
- */
-void
-ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
-       bool enable_inclusive = ws->enable_inclusive;
-       bool enable_exclusive = ws->enable_exclusive;
-       ws->enable_inclusive = false;
-       ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
-       ac_build_wg_wavescan_bottom(ctx, ws);
-       ws->enable_inclusive = enable_inclusive;
-       ws->enable_exclusive = enable_exclusive;
-
-       /* ws->result_reduce is already the correct value */
-       if (ws->enable_inclusive)
-               ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
-       if (ws->enable_exclusive)
-               ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
-}
-
-/**
- * A scan that reduces per-thread values across an entire workgroup.
- *
- * The caller must ensure that all lanes are active when this code runs
- * (WWM is insufficient!), because there is an implied barrier.
- */
-void
-ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
-       ac_build_wg_scan_top(ctx, ws);
-       ac_build_s_barrier(ctx);
-       ac_build_wg_scan_bottom(ctx, ws);
-}
-
-LLVMValueRef
-ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
-               unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
-       unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
-       if (ctx->chip_class >= GFX8) {
-               return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
-       } else {
-               return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
-       }
-}
-
-LLVMValueRef
-ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
-{
-       index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
-       return ac_build_intrinsic(ctx,
-                 "llvm.amdgcn.ds.bpermute", ctx->i32,
-                 (LLVMValueRef []) {index, src}, 2,
-                 AC_FUNC_ATTR_READNONE |
-                 AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                  unsigned bitsize)
-{
-       LLVMTypeRef type;
-       char *intr;
-
-       if (bitsize == 16) {
-               intr = "llvm.amdgcn.frexp.exp.i16.f16";
-               type = ctx->i16;
-       } else if (bitsize == 32) {
-               intr = "llvm.amdgcn.frexp.exp.i32.f32";
-               type = ctx->i32;
-       } else {
-               intr = "llvm.amdgcn.frexp.exp.i32.f64";
-               type = ctx->i32;
-       }
-
-       LLVMValueRef params[] = {
-               src0,
-       };
-       return ac_build_intrinsic(ctx, intr, type, params, 1,
-                                 AC_FUNC_ATTR_READNONE);
-}
-LLVMValueRef
-ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                   unsigned bitsize)
-{
-       LLVMTypeRef type;
-       char *intr;
-
-       if (bitsize == 16) {
-               intr = "llvm.amdgcn.frexp.mant.f16";
-               type = ctx->f16;
-       } else if (bitsize == 32) {
-               intr = "llvm.amdgcn.frexp.mant.f32";
-               type = ctx->f32;
-       } else {
-               intr = "llvm.amdgcn.frexp.mant.f64";
-               type = ctx->f64;
-       }
-
-       LLVMValueRef params[] = {
-               src0,
-       };
-       return ac_build_intrinsic(ctx, intr, type, params, 1,
-                                 AC_FUNC_ATTR_READNONE);
-}
-
-/*
- * this takes an I,J coordinate pair,
- * and works out the X and Y derivatives.
- * it returns DDX(I), DDX(J), DDY(I), DDY(J).
- */
-LLVMValueRef
-ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
-{
-       LLVMValueRef result[4], a;
-       unsigned i;
-
-       for (i = 0; i < 2; i++) {
-               a = LLVMBuildExtractElement(ctx->builder, interp_ij,
-                                           LLVMConstInt(ctx->i32, i, false), "");
-               result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
-               result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
-       }
-       return ac_build_gather_values(ctx, result, 4);
-}
-
-LLVMValueRef
-ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
-{
-       LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
-                                                ctx->i1, NULL, 0,
-                                                AC_FUNC_ATTR_READNONE);
-       result = LLVMBuildNot(ctx->builder, result, "");
-       return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
-                          LLVMValueRef *args, unsigned num_args)
-{
-       LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
-       LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
-       return ret;
-}
-
-void
-ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
-               LLVMValueRef stencil, LLVMValueRef samplemask,
-               struct ac_export_args *args)
-{
-       unsigned mask = 0;
-       unsigned format = ac_get_spi_shader_z_format(depth != NULL,
-                                                    stencil != NULL,
-                                                    samplemask != NULL);
-
-       assert(depth || stencil || samplemask);
-
-       memset(args, 0, sizeof(*args));
-
-       args->valid_mask = 1; /* whether the EXEC mask is valid */
-       args->done = 1; /* DONE bit */
-
-       /* Specify the target we are exporting */
-       args->target = V_008DFC_SQ_EXP_MRTZ;
-
-       args->compr = 0; /* COMP flag */
-       args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
-       args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
-       args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
-       args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
-
-       if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
-               assert(!depth);
-               args->compr = 1; /* COMPR flag */
-
-               if (stencil) {
-                       /* Stencil should be in X[23:16]. */
-                       stencil = ac_to_integer(ctx, stencil);
-                       stencil = LLVMBuildShl(ctx->builder, stencil,
-                                              LLVMConstInt(ctx->i32, 16, 0), "");
-                       args->out[0] = ac_to_float(ctx, stencil);
-                       mask |= 0x3;
-               }
-               if (samplemask) {
-                       /* SampleMask should be in Y[15:0]. */
-                       args->out[1] = samplemask;
-                       mask |= 0xc;
-               }
-       } else {
-               if (depth) {
-                       args->out[0] = depth;
-                       mask |= 0x1;
-               }
-               if (stencil) {
-                       args->out[1] = stencil;
-                       mask |= 0x2;
-               }
-               if (samplemask) {
-                       args->out[2] = samplemask;
-                       mask |= 0x4;
-               }
-       }
-
-       /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
-        * at the X writemask component. */
-       if (ctx->chip_class == GFX6 &&
-           ctx->family != CHIP_OLAND &&
-           ctx->family != CHIP_HAINAN)
-               mask |= 0x1;
-
-       /* Specify which components to enable */
-       args->enabled_channels = mask;
-}
-
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
deleted file mode 100644 (file)
index 013bf00..0000000
+++ /dev/null
@@ -1,744 +0,0 @@
-/*
- * Copyright 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-#ifndef AC_LLVM_BUILD_H
-#define AC_LLVM_BUILD_H
-
-#include <stdbool.h>
-#include <llvm-c/Core.h>
-#include "compiler/nir/nir.h"
-#include "amd_family.h"
-#include "ac_shader_util.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
-       AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
-       AC_ADDR_SPACE_GLOBAL = 1,
-       AC_ADDR_SPACE_GDS = 2,
-       AC_ADDR_SPACE_LDS = 3,
-       AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
-       AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
-};
-
-#define AC_WAIT_LGKM   (1 << 0) /* LDS, GDS, constant, message */
-#define AC_WAIT_VLOAD  (1 << 1) /* VMEM load/sample instructions */
-#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
-
-struct ac_llvm_flow;
-struct ac_llvm_compiler;
-enum ac_float_mode;
-
-struct ac_llvm_flow_state {
-       struct ac_llvm_flow *stack;
-       unsigned depth_max;
-       unsigned depth;
-};
-
-struct ac_llvm_context {
-       LLVMContextRef context;
-       LLVMModuleRef module;
-       LLVMBuilderRef builder;
-
-       LLVMTypeRef voidt;
-       LLVMTypeRef i1;
-       LLVMTypeRef i8;
-       LLVMTypeRef i16;
-       LLVMTypeRef i32;
-       LLVMTypeRef i64;
-       LLVMTypeRef intptr;
-       LLVMTypeRef f16;
-       LLVMTypeRef f32;
-       LLVMTypeRef f64;
-       LLVMTypeRef v2i16;
-       LLVMTypeRef v2i32;
-       LLVMTypeRef v3i32;
-       LLVMTypeRef v4i32;
-       LLVMTypeRef v2f32;
-       LLVMTypeRef v3f32;
-       LLVMTypeRef v4f32;
-       LLVMTypeRef v8i32;
-       LLVMTypeRef iN_wavemask;
-       LLVMTypeRef iN_ballotmask;
-
-       LLVMValueRef i8_0;
-       LLVMValueRef i8_1;
-       LLVMValueRef i16_0;
-       LLVMValueRef i16_1;
-       LLVMValueRef i32_0;
-       LLVMValueRef i32_1;
-       LLVMValueRef i64_0;
-       LLVMValueRef i64_1;
-       LLVMValueRef f16_0;
-       LLVMValueRef f16_1;
-       LLVMValueRef f32_0;
-       LLVMValueRef f32_1;
-       LLVMValueRef f64_0;
-       LLVMValueRef f64_1;
-       LLVMValueRef i1true;
-       LLVMValueRef i1false;
-
-       /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
-        * are two ac_llvm_contexts. Declare a pointer here, so that the control
-        * flow stack is shared by both ac_llvm_contexts.
-        */
-       struct ac_llvm_flow_state *flow;
-
-       unsigned range_md_kind;
-       unsigned invariant_load_md_kind;
-       unsigned uniform_md_kind;
-       unsigned fpmath_md_kind;
-       LLVMValueRef fpmath_md_2p5_ulp;
-       LLVMValueRef empty_md;
-
-       enum chip_class chip_class;
-       enum radeon_family family;
-
-       unsigned wave_size;
-       unsigned ballot_mask_bits;
-
-       LLVMValueRef lds;
-};
-
-void
-ac_llvm_context_init(struct ac_llvm_context *ctx,
-                    struct ac_llvm_compiler *compiler,
-                    enum chip_class chip_class, enum radeon_family family,
-                    enum ac_float_mode float_mode, unsigned wave_size,
-                    unsigned ballot_mask_bits);
-
-void
-ac_llvm_context_dispose(struct ac_llvm_context *ctx);
-
-int
-ac_get_llvm_num_components(LLVMValueRef value);
-
-int
-ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
-
-LLVMValueRef
-ac_llvm_extract_elem(struct ac_llvm_context *ac,
-                    LLVMValueRef value,
-                    int index);
-
-unsigned ac_get_type_size(LLVMTypeRef type);
-
-LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
-LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
-LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
-LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
-LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
-
-LLVMValueRef
-ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
-                  LLVMTypeRef return_type, LLVMValueRef *params,
-                  unsigned param_count, unsigned attrib_mask);
-
-void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
-
-LLVMValueRef
-ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
-            unsigned count_incoming, LLVMValueRef *values,
-            LLVMBasicBlockRef *blocks);
-
-void ac_build_s_barrier(struct ac_llvm_context *ctx);
-void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
-                                  LLVMValueRef *pvgpr);
-
-LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
-
-LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
-LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
-                                LLVMValueRef value);
-
-LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef
-ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
-                              unsigned value_count, unsigned component);
-
-LLVMValueRef
-ac_build_gather_values_extended(struct ac_llvm_context *ctx,
-                               LLVMValueRef *values,
-                               unsigned value_count,
-                               unsigned value_stride,
-                               bool load,
-                               bool always_vector);
-LLVMValueRef
-ac_build_gather_values(struct ac_llvm_context *ctx,
-                      LLVMValueRef *values,
-                      unsigned value_count);
-
-LLVMValueRef
-ac_extract_components(struct ac_llvm_context *ctx,
-                     LLVMValueRef value,
-                     unsigned start,
-                     unsigned channels);
-
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
-                                    LLVMValueRef value,
-                                    unsigned num_channels);
-LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef
-ac_build_fdiv(struct ac_llvm_context *ctx,
-             LLVMValueRef num,
-             LLVMValueRef den);
-
-LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
-                               LLVMValueRef num,
-                               LLVMValueRef multiplier,
-                               LLVMValueRef pre_shift,
-                               LLVMValueRef post_shift,
-                               LLVMValueRef increment);
-LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
-                                   LLVMValueRef num,
-                                   LLVMValueRef multiplier,
-                                   LLVMValueRef pre_shift,
-                                   LLVMValueRef post_shift,
-                                   LLVMValueRef increment);
-LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
-                                             LLVMValueRef num,
-                                             LLVMValueRef multiplier,
-                                             LLVMValueRef post_shift);
-
-void
-ac_prepare_cube_coords(struct ac_llvm_context *ctx,
-                      bool is_deriv, bool is_array, bool is_lod,
-                      LLVMValueRef *coords_arg,
-                      LLVMValueRef *derivs_arg);
-
-
-LLVMValueRef
-ac_build_fs_interp(struct ac_llvm_context *ctx,
-                  LLVMValueRef llvm_chan,
-                  LLVMValueRef attr_number,
-                  LLVMValueRef params,
-                  LLVMValueRef i,
-                  LLVMValueRef j);
-
-LLVMValueRef
-ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
-                      LLVMValueRef llvm_chan,
-                      LLVMValueRef attr_number,
-                      LLVMValueRef params,
-                      LLVMValueRef i,
-                      LLVMValueRef j);
-
-LLVMValueRef
-ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
-                      LLVMValueRef parameter,
-                      LLVMValueRef llvm_chan,
-                      LLVMValueRef attr_number,
-                      LLVMValueRef params);
-
-LLVMValueRef
-ac_build_gep_ptr(struct ac_llvm_context *ctx,
-                LLVMValueRef base_ptr,
-                LLVMValueRef index);
-
-LLVMValueRef
-ac_build_gep0(struct ac_llvm_context *ctx,
-             LLVMValueRef base_ptr,
-             LLVMValueRef index);
-LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
-                                 LLVMValueRef index);
-
-void
-ac_build_indexed_store(struct ac_llvm_context *ctx,
-                      LLVMValueRef base_ptr, LLVMValueRef index,
-                      LLVMValueRef value);
-
-LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
-                          LLVMValueRef index);
-LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
-                                    LLVMValueRef base_ptr, LLVMValueRef index);
-LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
-                                  LLVMValueRef base_ptr, LLVMValueRef index);
-LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
-                                  LLVMValueRef base_ptr, LLVMValueRef index);
-
-void
-ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef vdata,
-                           unsigned num_channels,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           unsigned inst_offset,
-                           unsigned cache_policy,
-                           bool swizzle_enable_hint);
-
-void
-ac_build_buffer_store_format(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef data,
-                            LLVMValueRef vindex,
-                            LLVMValueRef voffset,
-                            unsigned num_channels,
-                            unsigned cache_policy);
-
-LLVMValueRef
-ac_build_buffer_load(struct ac_llvm_context *ctx,
-                    LLVMValueRef rsrc,
-                    int num_channels,
-                    LLVMValueRef vindex,
-                    LLVMValueRef voffset,
-                    LLVMValueRef soffset,
-                    unsigned inst_offset,
-                    unsigned cache_policy,
-                    bool can_speculate,
-                    bool allow_smem);
-
-LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
-                                        LLVMValueRef rsrc,
-                                        LLVMValueRef vindex,
-                                        LLVMValueRef voffset,
-                                        unsigned num_channels,
-                                        unsigned cache_policy,
-                                        bool can_speculate);
-
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           LLVMValueRef immoffset,
-                           unsigned cache_policy);
-
-LLVMValueRef
-ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
-                          LLVMValueRef rsrc,
-                          LLVMValueRef voffset,
-                          LLVMValueRef soffset,
-                          LLVMValueRef immoffset,
-                          unsigned cache_policy);
-
-LLVMValueRef
-ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef vindex,
-                            LLVMValueRef voffset,
-                            LLVMValueRef soffset,
-                            LLVMValueRef immoffset,
-                            unsigned num_channels,
-                            unsigned dfmt,
-                            unsigned nfmt,
-                            unsigned cache_policy,
-                            bool can_speculate);
-
-LLVMValueRef
-ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
-                         LLVMValueRef rsrc,
-                         LLVMValueRef voffset,
-                         LLVMValueRef soffset,
-                         LLVMValueRef immoffset,
-                         unsigned num_channels,
-                         unsigned dfmt,
-                         unsigned nfmt,
-                         unsigned cache_policy,
-                         bool can_speculate);
-
-/* For ac_build_fetch_format.
- *
- * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
- */
-enum {
-       AC_FETCH_FORMAT_FLOAT = 0,
-       AC_FETCH_FORMAT_FIXED,
-       AC_FETCH_FORMAT_UNORM,
-       AC_FETCH_FORMAT_SNORM,
-       AC_FETCH_FORMAT_USCALED,
-       AC_FETCH_FORMAT_SSCALED,
-       AC_FETCH_FORMAT_UINT,
-       AC_FETCH_FORMAT_SINT,
-};
-
-LLVMValueRef
-ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
-                              unsigned log_size,
-                              unsigned num_channels,
-                              unsigned format,
-                              bool reverse,
-                              bool known_aligned,
-                              LLVMValueRef rsrc,
-                              LLVMValueRef vindex,
-                              LLVMValueRef voffset,
-                              LLVMValueRef soffset,
-                              unsigned cache_policy,
-                              bool can_speculate);
-
-void
-ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
-                            LLVMValueRef rsrc,
-                            LLVMValueRef vdata,
-                            LLVMValueRef voffset,
-                            LLVMValueRef soffset,
-                            unsigned cache_policy);
-
-void
-ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
-                           LLVMValueRef rsrc,
-                           LLVMValueRef vdata,
-                           LLVMValueRef voffset,
-                           LLVMValueRef soffset,
-                           unsigned cache_policy);
-
-void
-ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
-                             LLVMValueRef rsrc,
-                             LLVMValueRef vdata,
-                             LLVMValueRef vindex,
-                             LLVMValueRef voffset,
-                             LLVMValueRef soffset,
-                             LLVMValueRef immoffset,
-                             unsigned num_channels,
-                             unsigned dfmt,
-                             unsigned nfmt,
-                             unsigned cache_policy);
-
-void
-ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
-                          LLVMValueRef rsrc,
-                          LLVMValueRef vdata,
-                          LLVMValueRef voffset,
-                          LLVMValueRef soffset,
-                          LLVMValueRef immoffset,
-                          unsigned num_channels,
-                          unsigned dfmt,
-                          unsigned nfmt,
-                          unsigned cache_policy);
-
-LLVMValueRef
-ac_get_thread_id(struct ac_llvm_context *ctx);
-
-#define AC_TID_MASK_TOP_LEFT 0xfffffffc
-#define AC_TID_MASK_TOP      0xfffffffd
-#define AC_TID_MASK_LEFT     0xfffffffe
-
-LLVMValueRef
-ac_build_ddxy(struct ac_llvm_context *ctx,
-             uint32_t mask,
-             int idx,
-             LLVMValueRef val);
-
-#define AC_SENDMSG_GS 2
-#define AC_SENDMSG_GS_DONE 3
-#define AC_SENDMSG_GS_ALLOC_REQ 9
-
-#define AC_SENDMSG_GS_OP_NOP      (0 << 4)
-#define AC_SENDMSG_GS_OP_CUT      (1 << 4)
-#define AC_SENDMSG_GS_OP_EMIT     (2 << 4)
-#define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
-
-void ac_build_sendmsg(struct ac_llvm_context *ctx,
-                     uint32_t msg,
-                     LLVMValueRef wave_id);
-
-LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
-                          LLVMValueRef arg,
-                          LLVMTypeRef dst_type);
-
-LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
-                         LLVMValueRef arg,
-                         LLVMTypeRef dst_type);
-LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b);
-LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b);
-LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b);
-LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
-                          LLVMValueRef b);
-LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
-LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
-LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-struct ac_export_args {
-       LLVMValueRef out[4];
-        unsigned target;
-        unsigned enabled_channels;
-        bool compr;
-        bool done;
-        bool valid_mask;
-};
-
-void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
-
-void ac_build_export_null(struct ac_llvm_context *ctx);
-
-enum ac_image_opcode {
-       ac_image_sample,
-       ac_image_gather4,
-       ac_image_load,
-       ac_image_load_mip,
-       ac_image_store,
-       ac_image_store_mip,
-       ac_image_get_lod,
-       ac_image_get_resinfo,
-       ac_image_atomic,
-       ac_image_atomic_cmpswap,
-};
-
-enum ac_atomic_op {
-       ac_atomic_swap,
-       ac_atomic_add,
-       ac_atomic_sub,
-       ac_atomic_smin,
-       ac_atomic_umin,
-       ac_atomic_smax,
-       ac_atomic_umax,
-       ac_atomic_and,
-       ac_atomic_or,
-       ac_atomic_xor,
-       ac_atomic_inc_wrap,
-       ac_atomic_dec_wrap,
-};
-
-/* These cache policy bits match the definitions used by the LLVM intrinsics. */
-enum ac_image_cache_policy {
-       ac_glc = 1 << 0, /* per-CU cache control */
-       ac_slc = 1 << 1, /* global L2 cache control */
-       ac_dlc = 1 << 2, /* per-shader-array cache control */
-};
-
-struct ac_image_args {
-       enum ac_image_opcode opcode : 4;
-       enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
-       enum ac_image_dim dim : 3;
-       unsigned dmask : 4;
-       unsigned cache_policy : 3;
-       bool unorm : 1;
-       bool level_zero : 1;
-       unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
-
-       LLVMValueRef resource;
-       LLVMValueRef sampler;
-       LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
-       LLVMValueRef offset;
-       LLVMValueRef bias;
-       LLVMValueRef compare;
-       LLVMValueRef derivs[6];
-       LLVMValueRef coords[4];
-       LLVMValueRef lod; // also used by ac_image_get_resinfo
-};
-
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
-                                  struct ac_image_args *a);
-LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
-                                            LLVMValueRef rsrc);
-LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
-                                   LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
-                                    LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
-                                    LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
-                                LLVMValueRef args[2], unsigned bits, bool hi);
-LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
-                                LLVMValueRef args[2], unsigned bits, bool hi);
-LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
-void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
-LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
-                         LLVMValueRef offset, LLVMValueRef width,
-                         bool is_signed);
-LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
-                          LLVMValueRef s1, LLVMValueRef s2);
-LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
-                          LLVMValueRef s1, LLVMValueRef s2);
-
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
-
-LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                          unsigned bitsize);
-
-LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           LLVMValueRef src1, LLVMValueRef src2,
-                           unsigned bitsize);
-
-LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           unsigned bitsize);
-
-LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           unsigned bitsize);
-
-LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
-
-LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
-                                      LLVMValueRef src0);
-
-void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
-                           LLVMValueRef main_fn,
-                           uint8_t *vs_output_param_offset,
-                           uint32_t num_outputs,
-                           uint8_t *num_param_exports);
-void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
-
-void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
-LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
-                        LLVMValueRef dw_addr);
-void ac_lds_store(struct ac_llvm_context *ctx,
-                 LLVMValueRef dw_addr, LLVMValueRef value);
-
-LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
-                        LLVMTypeRef dst_type,
-                        LLVMValueRef src0);
-
-LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
-LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
-
-void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_break(struct ac_llvm_context *ctx);
-void ac_build_continue(struct ac_llvm_context *ctx);
-void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
-void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
-                int lable_id);
-void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
-                 int lable_id);
-
-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
-                            const char *name);
-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
-                                  const char *name);
-
-LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
-                        LLVMTypeRef type);
-
-LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
-                           unsigned count);
-
-LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
-                            unsigned rshift, unsigned bitwidth);
-
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
-                             LLVMValueRef *addr, bool is_array_tex);
-
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
-
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
-
-LLVMValueRef
-ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
-
-LLVMValueRef
-ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
-
-LLVMValueRef
-ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
-
-LLVMValueRef
-ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
-
-LLVMValueRef
-ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
-
-/**
- * Common arguments for a scan/reduce operation that accumulates per-wave
- * values across an entire workgroup, while respecting the order of waves.
- */
-struct ac_wg_scan {
-       bool enable_reduce;
-       bool enable_exclusive;
-       bool enable_inclusive;
-       nir_op op;
-       LLVMValueRef src; /* clobbered! */
-       LLVMValueRef result_reduce;
-       LLVMValueRef result_exclusive;
-       LLVMValueRef result_inclusive;
-       LLVMValueRef extra;
-       LLVMValueRef waveidx;
-       LLVMValueRef numwaves; /* only needed for "reduce" operations */
-
-       /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
-       LLVMValueRef scratch;
-       unsigned maxwaves;
-};
-
-void
-ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-
-void
-ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-
-LLVMValueRef
-ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
-               unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
-
-LLVMValueRef
-ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
-
-LLVMValueRef
-ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                  unsigned bitsize);
-
-LLVMValueRef
-ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                   unsigned bitsize);
-
-LLVMValueRef
-ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
-
-LLVMValueRef
-ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
-
-LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
-                          LLVMValueRef *args, unsigned num_args);
-
-LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
-                                LLVMValueRef ptr, LLVMValueRef val,
-                                const char *sync_scope);
-
-LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
-                                     LLVMValueRef cmp, LLVMValueRef val,
-                                     const char *sync_scope);
-
-void
-ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
-               LLVMValueRef stencil, LLVMValueRef samplemask,
-               struct ac_export_args *args);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/amd/common/ac_llvm_cull.c b/src/amd/common/ac_llvm_cull.c
deleted file mode 100644 (file)
index 1c2da3e..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#include "ac_llvm_cull.h"
-#include <llvm-c/Core.h>
-
-struct ac_position_w_info {
-       /* If a primitive intersects the W=0 plane, it causes a reflection
-        * of the determinant used for face culling. Every vertex behind
-        * the W=0 plane negates the determinant, so having 2 vertices behind
-        * the plane has no effect. This is i1 true if the determinant should be
-        * negated.
-        */
-       LLVMValueRef w_reflection;
-
-       /* If we simplify the "-w <= p <= w" view culling equation, we get
-        * "-w <= w", which can't be satisfied when w is negative.
-        * In perspective projection, a negative W means that the primitive
-        * is behind the viewer, but the equation is independent of the type
-        * of projection.
-        *
-        * w_accepted is false when all W are negative and therefore
-        * the primitive is invisible.
-        */
-       LLVMValueRef w_accepted;
-
-       LLVMValueRef all_w_positive;
-       LLVMValueRef any_w_negative;
-};
-
-static void ac_analyze_position_w(struct ac_llvm_context *ctx,
-                                 LLVMValueRef pos[3][4],
-                                 struct ac_position_w_info *w)
-{
-       LLVMBuilderRef builder = ctx->builder;
-       LLVMValueRef all_w_negative = ctx->i1true;
-
-       w->w_reflection = ctx->i1false;
-       w->any_w_negative = ctx->i1false;
-
-       for (unsigned i = 0; i < 3; i++) {
-               LLVMValueRef neg_w;
-
-               neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
-               /* If neg_w is true, negate w_reflection. */
-               w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
-               w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
-               all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
-       }
-       w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
-       w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
-}
-
-/* Perform front/back face culling and return true if the primitive is accepted. */
-static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
-                                LLVMValueRef pos[3][4],
-                                struct ac_position_w_info *w,
-                                bool cull_front,
-                                bool cull_back,
-                                bool cull_zero_area)
-{
-       LLVMBuilderRef builder = ctx->builder;
-
-       if (cull_front && cull_back)
-               return ctx->i1false;
-
-       if (!cull_front && !cull_back && !cull_zero_area)
-               return ctx->i1true;
-
-       /* Front/back face culling. Also if the determinant == 0, the triangle
-        * area is 0.
-        */
-       LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
-       LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
-       LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
-       LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
-       LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
-       LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
-       LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
-
-       /* Negative W negates the determinant. */
-       det = LLVMBuildSelect(builder, w->w_reflection,
-                             LLVMBuildFNeg(builder, det, ""),
-                             det, "");
-
-       LLVMValueRef accepted = NULL;
-       if (cull_front) {
-               LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
-               accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
-       } else if (cull_back) {
-               LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
-               accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
-       } else if (cull_zero_area) {
-               accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
-       }
-       return accepted;
-}
-
-/* Perform view culling and small primitive elimination and return true
- * if the primitive is accepted and initially_accepted == true. */
-static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
-                             LLVMValueRef pos[3][4],
-                             LLVMValueRef initially_accepted,
-                             struct ac_position_w_info *w,
-                             LLVMValueRef vp_scale[2],
-                             LLVMValueRef vp_translate[2],
-                             LLVMValueRef small_prim_precision,
-                             bool cull_view_xy,
-                             bool cull_view_near_z,
-                             bool cull_view_far_z,
-                             bool cull_small_prims,
-                             bool use_halfz_clip_space)
-{
-       LLVMBuilderRef builder = ctx->builder;
-
-       if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
-               return ctx->i1true;
-
-       /* Skip the culling if the primitive has already been rejected or
-        * if any W is negative. The bounding box culling doesn't work when
-        * W is negative.
-        */
-       LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
-                                        w->all_w_positive, "");
-       LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
-       LLVMBuildStore(builder, initially_accepted, accepted_var);
-
-       ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
-       {
-               LLVMValueRef bbox_min[3], bbox_max[3];
-               LLVMValueRef accepted = initially_accepted;
-
-               /* Compute the primitive bounding box for easy culling. */
-               for (unsigned chan = 0; chan < 3; chan++) {
-                       bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
-                       bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
-
-                       bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
-                       bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
-               }
-
-               /* View culling. */
-               if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
-                       for (unsigned chan = 0; chan < 3; chan++) {
-                               LLVMValueRef visible;
-
-                               if ((cull_view_xy && chan <= 1) ||
-                                   (cull_view_near_z && chan == 2)) {
-                                       float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
-                                       visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
-                                                               LLVMConstReal(ctx->f32, t), "");
-                                       accepted = LLVMBuildAnd(builder, accepted, visible, "");
-                               }
-
-                               if ((cull_view_xy && chan <= 1) ||
-                                   (cull_view_far_z && chan == 2)) {
-                                       visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
-                                                               ctx->f32_1, "");
-                                       accepted = LLVMBuildAnd(builder, accepted, visible, "");
-                               }
-                       }
-               }
-
-               /* Small primitive elimination. */
-               if (cull_small_prims) {
-                       /* Assuming a sample position at (0.5, 0.5), if we round
-                        * the bounding box min/max extents and the results of
-                        * the rounding are equal in either the X or Y direction,
-                        * the bounding box does not intersect the sample.
-                        *
-                        * See these GDC slides for pictures:
-                        * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
-                        */
-                       LLVMValueRef min, max, not_equal[2], visible;
-
-                       for (unsigned chan = 0; chan < 2; chan++) {
-                               /* Convert the position to screen-space coordinates. */
-                               min = ac_build_fmad(ctx, bbox_min[chan],
-                                                   vp_scale[chan], vp_translate[chan]);
-                               max = ac_build_fmad(ctx, bbox_max[chan],
-                                                   vp_scale[chan], vp_translate[chan]);
-                               /* Scale the bounding box according to the precision of
-                                * the rasterizer and the number of MSAA samples. */
-                               min = LLVMBuildFSub(builder, min, small_prim_precision, "");
-                               max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
-
-                               /* Determine if the bbox intersects the sample point.
-                                * It also works for MSAA, but vp_scale, vp_translate,
-                                * and small_prim_precision are computed differently.
-                                */
-                               min = ac_build_round(ctx, min);
-                               max = ac_build_round(ctx, max);
-                               not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
-                       }
-                       visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
-                       accepted = LLVMBuildAnd(builder, accepted, visible, "");
-               }
-
-               LLVMBuildStore(builder, accepted, accepted_var);
-       }
-       ac_build_endif(ctx, 10000000);
-
-       return LLVMBuildLoad(builder, accepted_var, "");
-}
-
-/**
- * Return i1 true if the primitive is accepted (not culled).
- *
- * \param pos                   Vertex positions 3x vec4
- * \param initially_accepted    AND'ed with the result. Some computations can be
- *                              skipped if this is false.
- * \param vp_scale              Viewport scale XY.
- *                              For MSAA, multiply them by the number of samples.
- * \param vp_translate          Viewport translation XY.
- *                              For MSAA, multiply them by the number of samples.
- * \param small_prim_precision  Precision of small primitive culling. This should
- *                              be the same as or greater than the precision of
- *                              the rasterizer. Set to num_samples / 2^subpixel_bits.
- *                              subpixel_bits are defined by the quantization mode.
- * \param options               See ac_cull_options.
- */
-LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
-                             LLVMValueRef pos[3][4],
-                             LLVMValueRef initially_accepted,
-                             LLVMValueRef vp_scale[2],
-                             LLVMValueRef vp_translate[2],
-                             LLVMValueRef small_prim_precision,
-                             struct ac_cull_options *options)
-{
-       struct ac_position_w_info w;
-       ac_analyze_position_w(ctx, pos, &w);
-
-       /* W culling. */
-       LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
-       accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
-
-       /* Face culling. */
-       accepted = LLVMBuildAnd(ctx->builder, accepted,
-                               ac_cull_face(ctx, pos, &w,
-                                            options->cull_front,
-                                            options->cull_back,
-                                            options->cull_zero_area), "");
-
-       /* View culling and small primitive elimination. */
-       accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
-                            small_prim_precision,
-                            options->cull_view_xy,
-                            options->cull_view_near_z,
-                            options->cull_view_far_z,
-                            options->cull_small_prims,
-                            options->use_halfz_clip_space);
-       return accepted;
-}
diff --git a/src/amd/common/ac_llvm_cull.h b/src/amd/common/ac_llvm_cull.h
deleted file mode 100644 (file)
index 0aa6c90..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#ifndef AC_LLVM_CULL_H
-#define AC_LLVM_CULL_H
-
-#include "ac_llvm_build.h"
-
-struct ac_cull_options {
-       /* In general, I recommend setting all to true except view Z culling,
-        * which isn't so effective because W culling is cheaper and partially
-        * replaces near Z culling, and you don't need to set Position.z
-        * if Z culling is disabled.
-        *
-        * If something doesn't work, turn some of these off to find out what.
-        */
-       bool cull_front;
-       bool cull_back;
-       bool cull_view_xy;
-       bool cull_view_near_z;
-       bool cull_view_far_z;
-       bool cull_small_prims;
-       bool cull_zero_area;
-       bool cull_w; /* cull primitives with all W < 0 */
-
-       bool use_halfz_clip_space;
-};
-
-LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
-                             LLVMValueRef pos[3][4],
-                             LLVMValueRef initially_accepted,
-                             LLVMValueRef vp_scale[2],
-                             LLVMValueRef vp_translate[2],
-                             LLVMValueRef small_prim_precision,
-                             struct ac_cull_options *options);
-
-#endif
diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
deleted file mode 100644 (file)
index b7a72ee..0000000
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#include <cstring>
-
-#include "ac_binary.h"
-#include "ac_llvm_util.h"
-#include "ac_llvm_build.h"
-
-#include "util/macros.h"
-
-#include <llvm-c/Core.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
-#include <llvm/Transforms/IPO.h>
-
-#include <llvm/IR/LegacyPassManager.h>
-
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
-{
-   llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
-   A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
-}
-
-bool ac_is_sgpr_param(LLVMValueRef arg)
-{
-       llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
-       llvm::AttributeList AS = A->getParent()->getAttributes();
-       unsigned ArgNo = A->getArgNo();
-       return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
-}
-
-LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
-{
-       return LLVMGetCalledValue(call);
-}
-
-bool ac_llvm_is_function(LLVMValueRef v)
-{
-       return LLVMGetValueKind(v) == LLVMFunctionValueKind;
-}
-
-LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
-{
-   llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
-   LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
-
-   llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
-   llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
-   return module;
-}
-
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
-                                enum ac_float_mode float_mode)
-{
-       LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
-
-       llvm::FastMathFlags flags;
-
-       switch (float_mode) {
-       case AC_FLOAT_MODE_DEFAULT:
-               break;
-       case AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH:
-               flags.setNoSignedZeros();
-               llvm::unwrap(builder)->setFastMathFlags(flags);
-               break;
-       case AC_FLOAT_MODE_UNSAFE_FP_MATH:
-               flags.setFast();
-               llvm::unwrap(builder)->setFastMathFlags(flags);
-               break;
-       }
-
-       return builder;
-}
-
-LLVMTargetLibraryInfoRef
-ac_create_target_library_info(const char *triple)
-{
-       return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
-}
-
-void
-ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
-{
-       delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
-}
-
-/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
- * better compatibility with C code. */
-struct raw_memory_ostream : public llvm::raw_pwrite_stream {
-       char *buffer;
-       size_t written;
-       size_t bufsize;
-
-       raw_memory_ostream()
-       {
-               buffer = NULL;
-               written = 0;
-               bufsize = 0;
-               SetUnbuffered();
-       }
-
-       ~raw_memory_ostream()
-       {
-               free(buffer);
-       }
-
-       void clear()
-       {
-               written = 0;
-       }
-
-       void take(char *&out_buffer, size_t &out_size)
-       {
-               out_buffer = buffer;
-               out_size = written;
-               buffer = NULL;
-               written = 0;
-               bufsize = 0;
-       }
-
-       void flush() = delete;
-
-       void write_impl(const char *ptr, size_t size) override
-       {
-               if (unlikely(written + size < written))
-                       abort();
-               if (written + size > bufsize) {
-                       bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
-                       buffer = (char *)realloc(buffer, bufsize);
-                       if (!buffer) {
-                               fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
-                               abort();
-                       }
-               }
-               memcpy(buffer + written, ptr, size);
-               written += size;
-       }
-
-       void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
-       {
-               assert(offset == (size_t)offset &&
-                      offset + size >= offset && offset + size <= written);
-               memcpy(buffer + offset, ptr, size);
-       }
-
-       uint64_t current_pos() const override
-       {
-               return written;
-       }
-};
-
-/* The LLVM compiler is represented as a pass manager containing passes for
- * optimizations, instruction selection, and code generation.
- */
-struct ac_compiler_passes {
-       raw_memory_ostream ostream; /* ELF shader binary stream */
-       llvm::legacy::PassManager passmgr; /* list of passes */
-};
-
-struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
-{
-       struct ac_compiler_passes *p = new ac_compiler_passes();
-       if (!p)
-               return NULL;
-
-       llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
-
-       if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
-                                   nullptr,
-                                   llvm::TargetMachine::CGFT_ObjectFile)) {
-               fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
-               delete p;
-               return NULL;
-       }
-       return p;
-}
-
-void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
-{
-       delete p;
-}
-
-/* This returns false on failure. */
-bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
-                             char **pelf_buffer, size_t *pelf_size)
-{
-       p->passmgr.run(*llvm::unwrap(module));
-       p->ostream.take(*pelf_buffer, *pelf_size);
-       return true;
-}
-
-void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
-{
-       llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
-}
-
-void ac_enable_global_isel(LLVMTargetMachineRef tm)
-{
-  reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
-}
-
-LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
-                                LLVMValueRef ptr, LLVMValueRef val,
-                                const char *sync_scope) {
-       llvm::AtomicRMWInst::BinOp binop;
-       switch (op) {
-       case LLVMAtomicRMWBinOpXchg:
-               binop = llvm::AtomicRMWInst::Xchg;
-               break;
-       case LLVMAtomicRMWBinOpAdd:
-               binop = llvm::AtomicRMWInst::Add;
-               break;
-       case LLVMAtomicRMWBinOpSub:
-               binop = llvm::AtomicRMWInst::Sub;
-               break;
-       case LLVMAtomicRMWBinOpAnd:
-               binop = llvm::AtomicRMWInst::And;
-               break;
-       case LLVMAtomicRMWBinOpNand:
-               binop = llvm::AtomicRMWInst::Nand;
-               break;
-       case LLVMAtomicRMWBinOpOr:
-               binop = llvm::AtomicRMWInst::Or;
-               break;
-       case LLVMAtomicRMWBinOpXor:
-               binop = llvm::AtomicRMWInst::Xor;
-               break;
-       case LLVMAtomicRMWBinOpMax:
-               binop = llvm::AtomicRMWInst::Max;
-               break;
-       case LLVMAtomicRMWBinOpMin:
-               binop = llvm::AtomicRMWInst::Min;
-               break;
-       case LLVMAtomicRMWBinOpUMax:
-               binop = llvm::AtomicRMWInst::UMax;
-               break;
-       case LLVMAtomicRMWBinOpUMin:
-               binop = llvm::AtomicRMWInst::UMin;
-               break;
-       default:
-               unreachable(!"invalid LLVMAtomicRMWBinOp");
-          break;
-       }
-       unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
-       return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
-               binop, llvm::unwrap(ptr), llvm::unwrap(val),
-               llvm::AtomicOrdering::SequentiallyConsistent, SSID));
-}
-
-LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
-                                     LLVMValueRef cmp, LLVMValueRef val,
-                                     const char *sync_scope) {
-       unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
-       return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
-                         llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
-                         llvm::AtomicOrdering::SequentiallyConsistent,
-                         llvm::AtomicOrdering::SequentiallyConsistent, SSID));
-}
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
deleted file mode 100644 (file)
index ddc8fee..0000000
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
-#include "ac_llvm_util.h"
-#include "ac_llvm_build.h"
-#include "util/bitscan.h"
-#include <llvm-c/Core.h>
-#include <llvm-c/Support.h>
-#include <llvm-c/Transforms/IPO.h>
-#include <llvm-c/Transforms/Scalar.h>
-#include <llvm-c/Transforms/Utils.h>
-#include "c11/threads.h"
-#include "gallivm/lp_bld_misc.h"
-#include "util/u_math.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-static void ac_init_llvm_target()
-{
-       LLVMInitializeAMDGPUTargetInfo();
-       LLVMInitializeAMDGPUTarget();
-       LLVMInitializeAMDGPUTargetMC();
-       LLVMInitializeAMDGPUAsmPrinter();
-
-       /* For inline assembly. */
-       LLVMInitializeAMDGPUAsmParser();
-
-       /* For ACO disassembly. */
-       LLVMInitializeAMDGPUDisassembler();
-
-       /* Workaround for bug in llvm 4.0 that causes image intrinsics
-        * to disappear.
-        * https://reviews.llvm.org/D26348
-        *
-        * "mesa" is the prefix for error messages.
-        *
-        * -global-isel-abort=2 is a no-op unless global isel has been enabled.
-        * This option tells the backend to fall-back to SelectionDAG and print
-        * a diagnostic message if global isel fails.
-        */
-       const char *argv[] = {
-               "mesa",
-               "-simplifycfg-sink-common=false",
-               "-global-isel-abort=2",
-#if LLVM_VERSION_MAJOR >= 10
-               /* Atomic optimizations require LLVM 10.0 for gfx10 support. */
-               "-amdgpu-atomic-optimizations=true",
-#endif
-       };
-       LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
-}
-
-static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
-
-void ac_init_llvm_once(void)
-{
-       call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
-}
-
-static LLVMTargetRef ac_get_llvm_target(const char *triple)
-{
-       LLVMTargetRef target = NULL;
-       char *err_message = NULL;
-
-       if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
-               fprintf(stderr, "Cannot find target for triple %s ", triple);
-               if (err_message) {
-                       fprintf(stderr, "%s\n", err_message);
-               }
-               LLVMDisposeMessage(err_message);
-               return NULL;
-       }
-       return target;
-}
-
-const char *ac_get_llvm_processor_name(enum radeon_family family)
-{
-       switch (family) {
-       case CHIP_TAHITI:
-               return "tahiti";
-       case CHIP_PITCAIRN:
-               return "pitcairn";
-       case CHIP_VERDE:
-               return "verde";
-       case CHIP_OLAND:
-               return "oland";
-       case CHIP_HAINAN:
-               return "hainan";
-       case CHIP_BONAIRE:
-               return "bonaire";
-       case CHIP_KABINI:
-               return "kabini";
-       case CHIP_KAVERI:
-               return "kaveri";
-       case CHIP_HAWAII:
-               return "hawaii";
-       case CHIP_TONGA:
-               return "tonga";
-       case CHIP_ICELAND:
-               return "iceland";
-       case CHIP_CARRIZO:
-               return "carrizo";
-       case CHIP_FIJI:
-               return "fiji";
-       case CHIP_STONEY:
-               return "stoney";
-       case CHIP_POLARIS10:
-               return "polaris10";
-       case CHIP_POLARIS11:
-       case CHIP_POLARIS12:
-       case CHIP_VEGAM:
-               return "polaris11";
-       case CHIP_VEGA10:
-               return "gfx900";
-       case CHIP_RAVEN:
-               return "gfx902";
-       case CHIP_VEGA12:
-               return "gfx904";
-       case CHIP_VEGA20:
-               return "gfx906";
-       case CHIP_RAVEN2:
-       case CHIP_RENOIR:
-               return "gfx909";
-       case CHIP_ARCTURUS:
-               return "gfx908";
-       case CHIP_NAVI10:
-               return "gfx1010";
-       case CHIP_NAVI12:
-               return "gfx1011";
-       case CHIP_NAVI14:
-               return "gfx1012";
-       default:
-               return "";
-       }
-}
-
-static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
-                                                    enum ac_target_machine_options tm_options,
-                                                    LLVMCodeGenOptLevel level,
-                                                    const char **out_triple)
-{
-       assert(family >= CHIP_TAHITI);
-       char features[256];
-       const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
-       LLVMTargetRef target = ac_get_llvm_target(triple);
-
-       snprintf(features, sizeof(features),
-                "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s",
-                family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
-                        ",+wavefrontsize64,-wavefrontsize32" : "",
-                tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
-                tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
-                tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
-                tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
-                tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : "");
-
-       LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
-                                    target,
-                                    triple,
-                                    ac_get_llvm_processor_name(family),
-                                    features,
-                                    level,
-                                    LLVMRelocDefault,
-                                    LLVMCodeModelDefault);
-
-       if (out_triple)
-               *out_triple = triple;
-       if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
-               ac_enable_global_isel(tm);
-       return tm;
-}
-
-static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
-                                           bool check_ir)
-{
-       LLVMPassManagerRef passmgr = LLVMCreatePassManager();
-       if (!passmgr)
-               return NULL;
-
-       if (target_library_info)
-               LLVMAddTargetLibraryInfo(target_library_info,
-                                        passmgr);
-
-       if (check_ir)
-               LLVMAddVerifierPass(passmgr);
-       LLVMAddAlwaysInlinerPass(passmgr);
-       /* Normally, the pass manager runs all passes on one function before
-        * moving onto another. Adding a barrier no-op pass forces the pass
-        * manager to run the inliner on all functions first, which makes sure
-        * that the following passes are only run on the remaining non-inline
-        * function, so it removes useless work done on dead inline functions.
-        */
-       ac_llvm_add_barrier_noop_pass(passmgr);
-       /* This pass should eliminate all the load and store instructions. */
-       LLVMAddPromoteMemoryToRegisterPass(passmgr);
-       LLVMAddScalarReplAggregatesPass(passmgr);
-       LLVMAddLICMPass(passmgr);
-       LLVMAddAggressiveDCEPass(passmgr);
-       LLVMAddCFGSimplificationPass(passmgr);
-       /* This is recommended by the instruction combining pass. */
-       LLVMAddEarlyCSEMemSSAPass(passmgr);
-       LLVMAddInstructionCombiningPass(passmgr);
-       return passmgr;
-}
-
-static const char *attr_to_str(enum ac_func_attr attr)
-{
-   switch (attr) {
-   case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
-   case AC_FUNC_ATTR_INREG: return "inreg";
-   case AC_FUNC_ATTR_NOALIAS: return "noalias";
-   case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
-   case AC_FUNC_ATTR_READNONE: return "readnone";
-   case AC_FUNC_ATTR_READONLY: return "readonly";
-   case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
-   case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
-   case AC_FUNC_ATTR_CONVERGENT: return "convergent";
-   default:
-          fprintf(stderr, "Unhandled function attribute: %x\n", attr);
-          return 0;
-   }
-}
-
-void
-ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
-                     int attr_idx, enum ac_func_attr attr)
-{
-   const char *attr_name = attr_to_str(attr);
-   unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
-                                                      strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
-
-   if (LLVMIsAFunction(function))
-      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
-   else
-      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
-}
-
-void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
-                           unsigned attrib_mask)
-{
-       attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
-       attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
-
-       while (attrib_mask) {
-               enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
-               ac_add_function_attr(ctx, function, -1, attr);
-       }
-}
-
-void
-ac_dump_module(LLVMModuleRef module)
-{
-       char *str = LLVMPrintModuleToString(module);
-       fprintf(stderr, "%s", str);
-       LLVMDisposeMessage(str);
-}
-
-void
-ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
-                                    const char *name, unsigned value)
-{
-       char str[16];
-
-       snprintf(str, sizeof(str), "0x%x", value);
-       LLVMAddTargetDependentFunctionAttr(F, name, str);
-}
-
-void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
-{
-       if (!size)
-               return;
-
-       char str[32];
-       snprintf(str, sizeof(str), "%u,%u", size, size);
-       LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
-}
-
-unsigned
-ac_count_scratch_private_memory(LLVMValueRef function)
-{
-       unsigned private_mem_vgprs = 0;
-
-       /* Process all LLVM instructions. */
-       LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
-       while (bb) {
-               LLVMValueRef next = LLVMGetFirstInstruction(bb);
-
-               while (next) {
-                       LLVMValueRef inst = next;
-                       next = LLVMGetNextInstruction(next);
-
-                       if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
-                               continue;
-
-                       LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
-                       /* No idea why LLVM aligns allocas to 4 elements. */
-                       unsigned alignment = LLVMGetAlignment(inst);
-                       unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
-                       private_mem_vgprs += dw_size;
-               }
-               bb = LLVMGetNextBasicBlock(bb);
-       }
-
-       return private_mem_vgprs;
-}
-
-bool
-ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
-                     enum radeon_family family,
-                     enum ac_target_machine_options tm_options)
-{
-       const char *triple;
-       memset(compiler, 0, sizeof(*compiler));
-
-       compiler->tm = ac_create_target_machine(family, tm_options,
-                                               LLVMCodeGenLevelDefault,
-                                               &triple);
-       if (!compiler->tm)
-               return false;
-
-       if (tm_options & AC_TM_CREATE_LOW_OPT) {
-               compiler->low_opt_tm =
-                       ac_create_target_machine(family, tm_options,
-                                                LLVMCodeGenLevelLess, NULL);
-               if (!compiler->low_opt_tm)
-                       goto fail;
-       }
-
-       if (family >= CHIP_NAVI10) {
-               assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
-               compiler->tm_wave32 = ac_create_target_machine(family,
-                                                              tm_options | AC_TM_WAVE32,
-                                                              LLVMCodeGenLevelDefault,
-                                                              NULL);
-               if (!compiler->tm_wave32)
-                       goto fail;
-       }
-
-       compiler->target_library_info =
-               ac_create_target_library_info(triple);
-       if (!compiler->target_library_info)
-               goto fail;
-
-       compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
-                                             tm_options & AC_TM_CHECK_IR);
-       if (!compiler->passmgr)
-               goto fail;
-
-       return true;
-fail:
-       ac_destroy_llvm_compiler(compiler);
-       return false;
-}
-
-void
-ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
-{
-       ac_destroy_llvm_passes(compiler->passes);
-       ac_destroy_llvm_passes(compiler->passes_wave32);
-       ac_destroy_llvm_passes(compiler->low_opt_passes);
-
-       if (compiler->passmgr)
-               LLVMDisposePassManager(compiler->passmgr);
-       if (compiler->target_library_info)
-               ac_dispose_target_library_info(compiler->target_library_info);
-       if (compiler->low_opt_tm)
-               LLVMDisposeTargetMachine(compiler->low_opt_tm);
-       if (compiler->tm)
-               LLVMDisposeTargetMachine(compiler->tm);
-       if (compiler->tm_wave32)
-               LLVMDisposeTargetMachine(compiler->tm_wave32);
-}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
deleted file mode 100644 (file)
index 60c9a17..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#ifndef AC_LLVM_UTIL_H
-#define AC_LLVM_UTIL_H
-
-#include <stdbool.h>
-#include <llvm-c/TargetMachine.h>
-#include <llvm/Config/llvm-config.h>
-
-#include "amd_family.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct ac_compiler_passes;
-
-enum ac_func_attr {
-       AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
-       AC_FUNC_ATTR_INREG        = (1 << 2),
-       AC_FUNC_ATTR_NOALIAS      = (1 << 3),
-       AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
-       AC_FUNC_ATTR_READNONE     = (1 << 5),
-       AC_FUNC_ATTR_READONLY     = (1 << 6),
-       AC_FUNC_ATTR_WRITEONLY    = (1 << 7),
-       AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
-       AC_FUNC_ATTR_CONVERGENT = (1 << 9),
-
-       /* Legacy intrinsic that needs attributes on function declarations
-        * and they must match the internal LLVM definition exactly, otherwise
-        * intrinsic selection fails.
-        */
-       AC_FUNC_ATTR_LEGACY       = (1u << 31),
-};
-
-enum ac_target_machine_options {
-       AC_TM_SUPPORTS_SPILL = (1 << 0),
-       AC_TM_SISCHED = (1 << 1),
-       AC_TM_FORCE_ENABLE_XNACK = (1 << 2),
-       AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
-       AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
-       AC_TM_CHECK_IR = (1 << 5),
-       AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
-       AC_TM_CREATE_LOW_OPT = (1 << 7),
-       AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
-       AC_TM_WAVE32 = (1 << 9),
-};
-
-enum ac_float_mode {
-       AC_FLOAT_MODE_DEFAULT,
-       AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
-       AC_FLOAT_MODE_UNSAFE_FP_MATH,
-};
-
-/* Per-thread persistent LLVM objects. */
-struct ac_llvm_compiler {
-       LLVMTargetLibraryInfoRef        target_library_info;
-       LLVMPassManagerRef              passmgr;
-
-       /* Default compiler. */
-       LLVMTargetMachineRef            tm;
-       struct ac_compiler_passes       *passes;
-
-       /* Wave32 compiler for GFX10. */
-       LLVMTargetMachineRef            tm_wave32;
-       struct ac_compiler_passes       *passes_wave32;
-
-       /* Optional compiler for faster compilation with fewer optimizations.
-        * LLVM modules can be created with "tm" too. There is no difference.
-        */
-       LLVMTargetMachineRef            low_opt_tm; /* uses -O1 instead of -O2 */
-       struct ac_compiler_passes       *low_opt_passes;
-};
-
-const char *ac_get_llvm_processor_name(enum radeon_family family);
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
-bool ac_is_sgpr_param(LLVMValueRef param);
-void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
-                          int attr_idx, enum ac_func_attr attr);
-void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
-                           unsigned attrib_mask);
-void ac_dump_module(LLVMModuleRef module);
-
-LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
-bool ac_llvm_is_function(LLVMValueRef v);
-LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
-
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
-                                enum ac_float_mode float_mode);
-
-void
-ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
-                                    const char *name, unsigned value);
-void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
-
-static inline unsigned
-ac_get_load_intr_attribs(bool can_speculate)
-{
-       /* READNONE means writes can't affect it, while READONLY means that
-        * writes can affect it. */
-       return can_speculate ? AC_FUNC_ATTR_READNONE :
-                              AC_FUNC_ATTR_READONLY;
-}
-
-unsigned
-ac_count_scratch_private_memory(LLVMValueRef function);
-
-LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
-void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
-void ac_init_llvm_once(void);
-
-
-bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
-                          enum radeon_family family,
-                          enum ac_target_machine_options tm_options);
-void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
-
-struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
-void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
-bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
-                             char **pelf_buffer, size_t *pelf_size);
-void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
-void ac_enable_global_isel(LLVMTargetMachineRef tm);
-
-static inline bool
-ac_has_vec3_support(enum chip_class chip, bool use_format)
-{
-       if (chip == GFX6 && !use_format) {
-               /* GFX6 only supports vec3 with load/store format. */
-               return false;
-       }
-
-       return LLVM_VERSION_MAJOR >= 9;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* AC_LLVM_UTIL_H */
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
deleted file mode 100644 (file)
index bb99c73..0000000
+++ /dev/null
@@ -1,4944 +0,0 @@
-/*
- * Copyright © 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <llvm/Config/llvm-config.h>
-
-#include "ac_nir_to_llvm.h"
-#include "ac_llvm_build.h"
-#include "ac_llvm_util.h"
-#include "ac_binary.h"
-#include "sid.h"
-#include "nir/nir.h"
-#include "nir/nir_deref.h"
-#include "util/bitscan.h"
-#include "util/u_math.h"
-#include "ac_shader_abi.h"
-#include "ac_shader_util.h"
-
-struct ac_nir_context {
-       struct ac_llvm_context ac;
-       struct ac_shader_abi *abi;
-
-       gl_shader_stage stage;
-       shader_info *info;
-
-       LLVMValueRef *ssa_defs;
-
-       LLVMValueRef scratch;
-       LLVMValueRef constant_data;
-
-       struct hash_table *defs;
-       struct hash_table *phis;
-       struct hash_table *vars;
-
-       LLVMValueRef main_function;
-       LLVMBasicBlockRef continue_block;
-       LLVMBasicBlockRef break_block;
-
-       int num_locals;
-       LLVMValueRef *locals;
-};
-
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
-                                    nir_deref_instr *deref_instr,
-                                    enum ac_descriptor_type desc_type,
-                                    const nir_instr *instr,
-                                    bool image, bool write);
-
-static void
-build_store_values_extended(struct ac_llvm_context *ac,
-                            LLVMValueRef *values,
-                            unsigned value_count,
-                            unsigned value_stride,
-                            LLVMValueRef vec)
-{
-       LLVMBuilderRef builder = ac->builder;
-       unsigned i;
-
-       for (i = 0; i < value_count; i++) {
-               LLVMValueRef ptr = values[i * value_stride];
-               LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
-               LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
-               LLVMBuildStore(builder, value, ptr);
-       }
-}
-
-static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
-                                const nir_ssa_def *def)
-{
-       LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
-       if (def->num_components > 1) {
-               type = LLVMVectorType(type, def->num_components);
-       }
-       return type;
-}
-
-static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
-{
-       assert(src.is_ssa);
-       return nir->ssa_defs[src.ssa->index];
-}
-
-static LLVMValueRef
-get_memory_ptr(struct ac_nir_context *ctx, nir_src src)
-{
-       LLVMValueRef ptr = get_src(ctx, src);
-       ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
-       int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-
-       return LLVMBuildBitCast(ctx->ac.builder, ptr,
-                               LLVMPointerType(ctx->ac.i32, addr_space), "");
-}
-
-static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
-                                   const struct nir_block *b)
-{
-       struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
-       return (LLVMBasicBlockRef)entry->data;
-}
-
-static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
-                                nir_alu_src src,
-                                unsigned num_components)
-{
-       LLVMValueRef value = get_src(ctx, src.src);
-       bool need_swizzle = false;
-
-       assert(value);
-       unsigned src_components = ac_get_llvm_num_components(value);
-       for (unsigned i = 0; i < num_components; ++i) {
-               assert(src.swizzle[i] < src_components);
-               if (src.swizzle[i] != i)
-                       need_swizzle = true;
-       }
-
-       if (need_swizzle || num_components != src_components) {
-               LLVMValueRef masks[] = {
-                   LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
-                   LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
-                   LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
-                   LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
-
-               if (src_components > 1 && num_components == 1) {
-                       value = LLVMBuildExtractElement(ctx->ac.builder, value,
-                                                       masks[0], "");
-               } else if (src_components == 1 && num_components > 1) {
-                       LLVMValueRef values[] = {value, value, value, value};
-                       value = ac_build_gather_values(&ctx->ac, values, num_components);
-               } else {
-                       LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
-                       value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
-                                                      swizzle, "");
-               }
-       }
-       assert(!src.negate);
-       assert(!src.abs);
-       return value;
-}
-
-static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
-                                 LLVMIntPredicate pred, LLVMValueRef src0,
-                                 LLVMValueRef src1)
-{
-       LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
-       return LLVMBuildSelect(ctx->builder, result,
-                              LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
-                              ctx->i32_0, "");
-}
-
-static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
-                                   LLVMRealPredicate pred, LLVMValueRef src0,
-                                   LLVMValueRef src1)
-{
-       LLVMValueRef result;
-       src0 = ac_to_float(ctx, src0);
-       src1 = ac_to_float(ctx, src1);
-       result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
-       return LLVMBuildSelect(ctx->builder, result,
-                              LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
-                              ctx->i32_0, "");
-}
-
-static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
-                                        const char *intrin,
-                                        LLVMTypeRef result_type,
-                                        LLVMValueRef src0)
-{
-       char name[64];
-       LLVMValueRef params[] = {
-               ac_to_float(ctx, src0),
-       };
-
-       ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
-                                                ac_get_elem_bits(ctx, result_type));
-       assert(length < sizeof(name));
-       return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
-                                      const char *intrin,
-                                      LLVMTypeRef result_type,
-                                      LLVMValueRef src0, LLVMValueRef src1)
-{
-       char name[64];
-       LLVMValueRef params[] = {
-               ac_to_float(ctx, src0),
-               ac_to_float(ctx, src1),
-       };
-
-       ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
-                                                ac_get_elem_bits(ctx, result_type));
-       assert(length < sizeof(name));
-       return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
-                                        const char *intrin,
-                                        LLVMTypeRef result_type,
-                                        LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
-{
-       char name[64];
-       LLVMValueRef params[] = {
-               ac_to_float(ctx, src0),
-               ac_to_float(ctx, src1),
-               ac_to_float(ctx, src2),
-       };
-
-       ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
-                                                ac_get_elem_bits(ctx, result_type));
-       assert(length < sizeof(name));
-       return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
-                              LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
-{
-       assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
-
-       LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
-                                      ctx->i32_0, "");
-       return LLVMBuildSelect(ctx->builder, v,
-                              ac_to_integer_or_pointer(ctx, src1),
-                              ac_to_integer_or_pointer(ctx, src2), "");
-}
-
-static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
-                             LLVMValueRef src0)
-{
-       return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
-}
-
-static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
-                                   const char *intrin,
-                                   LLVMValueRef src0, LLVMValueRef src1)
-{
-       LLVMTypeRef ret_type;
-       LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
-       LLVMValueRef res;
-       LLVMValueRef params[] = { src0, src1 };
-       ret_type = LLVMStructTypeInContext(ctx->context, types,
-                                          2, true);
-
-       res = ac_build_intrinsic(ctx, intrin, ret_type,
-                                params, 2, AC_FUNC_ATTR_READNONE);
-
-       res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
-       res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
-       return res;
-}
-
-static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
-                            LLVMValueRef src0,
-                            unsigned bitsize)
-{
-       LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
-                                          LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
-                                          "");
-       result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
-
-       switch (bitsize) {
-       case 16:
-               return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
-       case 32:
-               return result;
-       case 64:
-               return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
-       default:
-               unreachable("Unsupported bit size.");
-       }
-}
-
-static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
-                            LLVMValueRef src0)
-{
-       src0 = ac_to_float(ctx, src0);
-       LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
-       return LLVMBuildSExt(ctx->builder,
-                            LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
-                            ctx->i32, "");
-}
-
-static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
-                            LLVMValueRef src0,
-                            unsigned bitsize)
-{
-       LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
-
-       switch (bitsize) {
-       case 8:
-               return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
-       case 16:
-               return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
-       case 32:
-               return result;
-       case 64:
-               return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
-       default:
-               unreachable("Unsupported bit size.");
-       }
-}
-
-static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
-                            LLVMValueRef src0)
-{
-       LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
-       return LLVMBuildSExt(ctx->builder,
-                            LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
-                            ctx->i32, "");
-}
-
-static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
-                              LLVMValueRef src0)
-{
-       LLVMValueRef result;
-       LLVMValueRef cond = NULL;
-
-       src0 = ac_to_float(ctx, src0);
-       result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
-
-       if (ctx->chip_class >= GFX8) {
-               LLVMValueRef args[2];
-               /* Check if the result is a denormal - and flush to 0 if so. */
-               args[0] = result;
-               args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
-               cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
-       }
-
-       /* need to convert back up to f32 */
-       result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
-
-       if (ctx->chip_class >= GFX8)
-               result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
-       else {
-               /* for GFX6-GFX7 */
-               /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
-                * so compare the result and flush to 0 if it's smaller.
-                */
-               LLVMValueRef temp, cond2;
-               temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
-               cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
-                                    LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
-                                    temp, "");
-               cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
-                                     temp, ctx->f32_0, "");
-               cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
-               result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
-       }
-       return result;
-}
-
-static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
-                                  LLVMValueRef src0, LLVMValueRef src1)
-{
-       LLVMValueRef dst64, result;
-       src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
-       src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
-
-       dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
-       dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
-       result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
-       return result;
-}
-
-static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
-                                  LLVMValueRef src0, LLVMValueRef src1)
-{
-       LLVMValueRef dst64, result;
-       src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
-       src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
-
-       dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
-       dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
-       result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
-       return result;
-}
-
-static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
-                            LLVMValueRef bits, LLVMValueRef offset)
-{
-       /* mask = ((1 << bits) - 1) << offset */
-       return LLVMBuildShl(ctx->builder,
-                           LLVMBuildSub(ctx->builder,
-                                        LLVMBuildShl(ctx->builder,
-                                                     ctx->i32_1,
-                                                     bits, ""),
-                                        ctx->i32_1, ""),
-                           offset, "");
-}
-
-static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
-                                        LLVMValueRef mask, LLVMValueRef insert,
-                                        LLVMValueRef base)
-{
-       /* Calculate:
-        *   (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
-        * Use the right-hand side, which the LLVM backend can convert to V_BFI.
-        */
-       return LLVMBuildXor(ctx->builder, base,
-                           LLVMBuildAnd(ctx->builder, mask,
-                                        LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
-}
-
-static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
-                                  LLVMValueRef src0,
-                                  LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
-                                                       LLVMValueRef args[2]))
-{
-       LLVMValueRef comp[2];
-
-       src0 = ac_to_float(ctx, src0);
-       comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
-       comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
-
-       return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
-}
-
-static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
-                                         LLVMValueRef src0)
-{
-       LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-       LLVMValueRef temps[2], val;
-       int i;
-
-       for (i = 0; i < 2; i++) {
-               val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
-               val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
-               val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
-               temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
-       }
-       return ac_build_gather_values(ctx, temps, 2);
-}
-
-static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
-                             nir_op op,
-                             LLVMValueRef src0)
-{
-       unsigned mask;
-       int idx;
-       LLVMValueRef result;
-
-       if (op == nir_op_fddx_fine)
-               mask = AC_TID_MASK_LEFT;
-       else if (op == nir_op_fddy_fine)
-               mask = AC_TID_MASK_TOP;
-       else
-               mask = AC_TID_MASK_TOP_LEFT;
-
-       /* for DDX we want to next X pixel, DDY next Y pixel. */
-       if (op == nir_op_fddx_fine ||
-           op == nir_op_fddx_coarse ||
-           op == nir_op_fddx)
-               idx = 1;
-       else
-               idx = 2;
-
-       result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
-       return result;
-}
-
-static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
-{
-       LLVMValueRef src[4], result = NULL;
-       unsigned num_components = instr->dest.dest.ssa.num_components;
-       unsigned src_components;
-       LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
-
-       assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
-       switch (instr->op) {
-       case nir_op_vec2:
-       case nir_op_vec3:
-       case nir_op_vec4:
-               src_components = 1;
-               break;
-       case nir_op_pack_half_2x16:
-       case nir_op_pack_snorm_2x16:
-       case nir_op_pack_unorm_2x16:
-               src_components = 2;
-               break;
-       case nir_op_unpack_half_2x16:
-               src_components = 1;
-               break;
-       case nir_op_cube_face_coord:
-       case nir_op_cube_face_index:
-               src_components = 3;
-               break;
-       default:
-               src_components = num_components;
-               break;
-       }
-       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-               src[i] = get_alu_src(ctx, instr->src[i], src_components);
-
-       switch (instr->op) {
-       case nir_op_mov:
-               result = src[0];
-               break;
-       case nir_op_fneg:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
-               break;
-       case nir_op_ineg:
-               result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
-               break;
-       case nir_op_inot:
-               result = LLVMBuildNot(ctx->ac.builder, src[0], "");
-               break;
-       case nir_op_iadd:
-               result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_fadd:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               src[1] = ac_to_float(&ctx->ac, src[1]);
-               result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_fsub:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               src[1] = ac_to_float(&ctx->ac, src[1]);
-               result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_isub:
-               result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_imul:
-               result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_imod:
-               result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_umod:
-               result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_irem:
-               result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_idiv:
-               result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_udiv:
-               result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_fmul:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               src[1] = ac_to_float(&ctx->ac, src[1]);
-               result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_frcp:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
-               break;
-       case nir_op_iand:
-               result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_ior:
-               result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_ixor:
-               result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_ishl:
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
-                       src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
-                                              LLVMTypeOf(src[0]), "");
-               else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
-                       src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
-                                               LLVMTypeOf(src[0]), "");
-               result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_ishr:
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
-                       src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
-                                              LLVMTypeOf(src[0]), "");
-               else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
-                       src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
-                                               LLVMTypeOf(src[0]), "");
-               result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_ushr:
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
-                       src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
-                                              LLVMTypeOf(src[0]), "");
-               else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
-                       src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
-                                               LLVMTypeOf(src[0]), "");
-               result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
-               break;
-       case nir_op_ilt32:
-               result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
-               break;
-       case nir_op_ine32:
-               result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
-               break;
-       case nir_op_ieq32:
-               result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
-               break;
-       case nir_op_ige32:
-               result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
-               break;
-       case nir_op_ult32:
-               result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
-               break;
-       case nir_op_uge32:
-               result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
-               break;
-       case nir_op_feq32:
-               result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
-               break;
-       case nir_op_fne32:
-               result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
-               break;
-       case nir_op_flt32:
-               result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
-               break;
-       case nir_op_fge32:
-               result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
-               break;
-       case nir_op_fabs:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_iabs:
-               result = emit_iabs(&ctx->ac, src[0]);
-               break;
-       case nir_op_imax:
-               result = ac_build_imax(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_imin:
-               result = ac_build_imin(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_umax:
-               result = ac_build_umax(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_umin:
-               result = ac_build_umin(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_isign:
-               result = ac_build_isign(&ctx->ac, src[0],
-                                       instr->dest.dest.ssa.bit_size);
-               break;
-       case nir_op_fsign:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_fsign(&ctx->ac, src[0],
-                                       instr->dest.dest.ssa.bit_size);
-               break;
-       case nir_op_ffloor:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_ftrunc:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_fceil:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_fround_even:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
-                                             ac_to_float_type(&ctx->ac, def_type),src[0]);
-               break;
-       case nir_op_ffract:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_fract(&ctx->ac, src[0],
-                                       instr->dest.dest.ssa.bit_size);
-               break;
-       case nir_op_fsin:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_fcos:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_fsqrt:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_fexp2:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_flog2:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               break;
-       case nir_op_frsq:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
-               break;
-       case nir_op_frexp_exp:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_frexp_exp(&ctx->ac, src[0],
-                                           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
-                       result = LLVMBuildSExt(ctx->ac.builder, result,
-                                              ctx->ac.i32, "");
-               break;
-       case nir_op_frexp_sig:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_frexp_mant(&ctx->ac, src[0],
-                                            instr->dest.dest.ssa.bit_size);
-               break;
-       case nir_op_fpow:
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
-               break;
-       case nir_op_fmax:
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
-               if (ctx->ac.chip_class < GFX9 &&
-                   instr->dest.dest.ssa.bit_size == 32) {
-                       /* Only pre-GFX9 chips do not flush denorms. */
-                       result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
-                                                     ac_to_float_type(&ctx->ac, def_type),
-                                                     result);
-               }
-               break;
-       case nir_op_fmin:
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
-               if (ctx->ac.chip_class < GFX9 &&
-                   instr->dest.dest.ssa.bit_size == 32) {
-                       /* Only pre-GFX9 chips do not flush denorms. */
-                       result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
-                                                     ac_to_float_type(&ctx->ac, def_type),
-                                                     result);
-               }
-               break;
-       case nir_op_ffma:
-               /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
-               result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
-               break;
-       case nir_op_ldexp:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
-                       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
-               else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
-                       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
-               else
-                       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
-               break;
-       case nir_op_bfm:
-               result = emit_bfm(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_bitfield_select:
-               result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
-               break;
-       case nir_op_ubfe:
-               result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
-               break;
-       case nir_op_ibfe:
-               result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
-               break;
-       case nir_op_bitfield_reverse:
-               result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
-               break;
-       case nir_op_bit_count:
-               result = ac_build_bit_count(&ctx->ac, src[0]);
-               break;
-       case nir_op_vec2:
-       case nir_op_vec3:
-       case nir_op_vec4:
-               for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-                       src[i] = ac_to_integer(&ctx->ac, src[i]);
-               result = ac_build_gather_values(&ctx->ac, src, num_components);
-               break;
-       case nir_op_f2i8:
-       case nir_op_f2i16:
-       case nir_op_f2i32:
-       case nir_op_f2i64:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
-               break;
-       case nir_op_f2u8:
-       case nir_op_f2u16:
-       case nir_op_f2u32:
-       case nir_op_f2u64:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
-               break;
-       case nir_op_i2f16:
-       case nir_op_i2f32:
-       case nir_op_i2f64:
-               result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
-               break;
-       case nir_op_u2f16:
-       case nir_op_u2f32:
-       case nir_op_u2f64:
-               result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
-               break;
-       case nir_op_f2f16_rtz:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               if (LLVMTypeOf(src[0]) == ctx->ac.f64)
-                       src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
-               LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
-               result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
-               result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
-               break;
-       case nir_op_f2f16_rtne:
-       case nir_op_f2f16:
-       case nir_op_f2f32:
-       case nir_op_f2f64:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
-                       result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
-               else
-                       result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
-               break;
-       case nir_op_u2u8:
-       case nir_op_u2u16:
-       case nir_op_u2u32:
-       case nir_op_u2u64:
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
-                       result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
-               else
-                       result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
-               break;
-       case nir_op_i2i8:
-       case nir_op_i2i16:
-       case nir_op_i2i32:
-       case nir_op_i2i64:
-               if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
-                       result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
-               else
-                       result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
-               break;
-       case nir_op_b32csel:
-               result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
-               break;
-       case nir_op_find_lsb:
-               result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
-               break;
-       case nir_op_ufind_msb:
-               result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
-               break;
-       case nir_op_ifind_msb:
-               result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
-               break;
-       case nir_op_uadd_carry:
-               result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
-               break;
-       case nir_op_usub_borrow:
-               result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
-               break;
-       case nir_op_b2f16:
-       case nir_op_b2f32:
-       case nir_op_b2f64:
-               result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
-               break;
-       case nir_op_f2b32:
-               result = emit_f2b(&ctx->ac, src[0]);
-               break;
-       case nir_op_b2i8:
-       case nir_op_b2i16:
-       case nir_op_b2i32:
-       case nir_op_b2i64:
-               result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
-               break;
-       case nir_op_i2b32:
-               result = emit_i2b(&ctx->ac, src[0]);
-               break;
-       case nir_op_fquantize2f16:
-               result = emit_f2f16(&ctx->ac, src[0]);
-               break;
-       case nir_op_umul_high:
-               result = emit_umul_high(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_imul_high:
-               result = emit_imul_high(&ctx->ac, src[0], src[1]);
-               break;
-       case nir_op_pack_half_2x16:
-               result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
-               break;
-       case nir_op_pack_snorm_2x16:
-               result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
-               break;
-       case nir_op_pack_unorm_2x16:
-               result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
-               break;
-       case nir_op_unpack_half_2x16:
-               result = emit_unpack_half_2x16(&ctx->ac, src[0]);
-               break;
-       case nir_op_fddx:
-       case nir_op_fddy:
-       case nir_op_fddx_fine:
-       case nir_op_fddy_fine:
-       case nir_op_fddx_coarse:
-       case nir_op_fddy_coarse:
-               result = emit_ddxy(ctx, instr->op, src[0]);
-               break;
-
-       case nir_op_unpack_64_2x32_split_x: {
-               assert(ac_get_llvm_num_components(src[0]) == 1);
-               LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
-                                                   ctx->ac.v2i32,
-                                                   "");
-               result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
-                                                ctx->ac.i32_0, "");
-               break;
-       }
-
-       case nir_op_unpack_64_2x32_split_y: {
-               assert(ac_get_llvm_num_components(src[0]) == 1);
-               LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
-                                                   ctx->ac.v2i32,
-                                                   "");
-               result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
-                                                ctx->ac.i32_1, "");
-               break;
-       }
-
-       case nir_op_pack_64_2x32_split: {
-               LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
-               result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
-               break;
-       }
-
-       case nir_op_pack_32_2x16_split: {
-               LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
-               result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
-               break;
-       }
-
-       case nir_op_unpack_32_2x16_split_x: {
-               LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
-                                                   ctx->ac.v2i16,
-                                                   "